All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.sketches.hive.tuple.DataToDoubleSummarySketchUDAF Maven / Gradle / Ivy

There is a newer version: 0.13.0
Show newest version
/*
 * Copyright 2016, Yahoo! Inc.
 * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
 */

package com.yahoo.sketches.hive.tuple;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;

import com.yahoo.sketches.tuple.DoubleSummary;
import com.yahoo.sketches.tuple.DoubleSummaryFactory;
import com.yahoo.sketches.tuple.SummaryFactory;

/**
 * This simple implementation is to give an example of a concrete UDAF based on the abstract
 * DataToSketchUDAF if no extra arguments are needed. The same functionality is included into
 * DataToDoubleSummaryWithModeSketchUDAF with the default summary mode of Sum, but the
 * implementation is more complex because of the extra argument.
 */

@Description(
  name = "DataToDoubleSummarySketch",
  value = "_FUNC_(key, double value, nominal number of entries, sampling probability)",
  extended = "Returns a Sketch as a binary blob that can be operated on by other"
    + " tuple sketch related functions. The nominal number of entries is optional, must be a power"
    + " of 2 and controls the relative error expected from the sketch."
    + " A number of 16384 can be expected to yield errors of roughly +-1.5% in the estimation of"
    + " uniques. The default number is defined in the sketches-core library, and at the time of this"
    + " writing was 4096 (about 3% error)."
    + " The sampling probability is optional and must be from 0 to 1. The default is 1 (no sampling)")
public class DataToDoubleSummarySketchUDAF extends DataToSketchUDAF {

  @Override
  public GenericUDAFEvaluator getEvaluator(final GenericUDAFParameterInfo info) throws SemanticException {
    super.getEvaluator(info);
    final ObjectInspector[] inspectors = info.getParameterObjectInspectors();
    ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[1], 1, PrimitiveCategory.DOUBLE);
    return createEvaluator();
  }

  @Override
  public GenericUDAFEvaluator createEvaluator() {
    return new DataToDoubleSummarySketchEvaluator();
  }

  static class DataToDoubleSummarySketchEvaluator extends DataToSketchEvaluator {

    @Override
    protected SummaryFactory getSummaryFactoryForIterate(final Object[] data) {
      return new DoubleSummaryFactory();
    }

    @Override
    protected SummaryFactory getSummaryFactoryForMerge(final Object data) {
      return new DoubleSummaryFactory();
    }

  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy