All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.sketches.hive.frequencies.GetFrequentItemsFromStringsSketchUDTF Maven / Gradle / Ivy

There is a newer version: 0.13.0
Show newest version
/*
 * Copyright 2016, Yahoo! Inc.
 * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
 */

package com.yahoo.sketches.hive.frequencies;

import java.util.Arrays;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.BytesWritable;

import com.yahoo.memory.Memory;
import com.yahoo.sketches.ArrayOfStringsSerDe;
import com.yahoo.sketches.frequencies.ErrorType;
import com.yahoo.sketches.frequencies.ItemsSketch;

@Description(name = "GetFrequentItems", value = "_FUNC_(sketch, errorType) - "
    + "Returns a list of frequent items in descending order by estimated frequency."
    + " Error type is optional and must be one of the following: "
    + "NO_FALSE_POSITIVES (default) or NO_FALSE_NEGATIVES.")
public class GetFrequentItemsFromStringsSketchUDTF extends GenericUDTF {

  PrimitiveObjectInspector inputObjectInspector;
  PrimitiveObjectInspector errorTypeObjectInspector;

  @Override
  public StructObjectInspector initialize(final ObjectInspector[] inspectors) throws UDFArgumentException {
    if (inspectors.length != 1 && inspectors.length != 2) {
      throw new UDFArgumentException("One or two arguments expected");
    }

    if (inspectors[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
      throw new UDFArgumentTypeException(0, "Primitive argument expected, but "
          + inspectors[0].getCategory().name() + " was recieved");
    }
    inputObjectInspector = (PrimitiveObjectInspector) inspectors[0];
    if (inputObjectInspector.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.BINARY) {
      throw new UDFArgumentTypeException(0, "Binary value expected as the first argument, but "
          + inputObjectInspector.getPrimitiveCategory().name() + " was recieved");
    }

    if (inspectors.length > 1) {
      if (inspectors[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
        throw new UDFArgumentTypeException(1, "Primitive argument expected, but "
            + inspectors[1].getCategory().name() + " was recieved");
      }
      errorTypeObjectInspector = (PrimitiveObjectInspector) inspectors[1];
      if (errorTypeObjectInspector.getPrimitiveCategory()
          != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
        throw new UDFArgumentTypeException(1, "String value expected as the first argument, but "
            + errorTypeObjectInspector.getPrimitiveCategory().name() + " was recieved");
      }
    }

    return ObjectInspectorFactory.getStandardStructObjectInspector(
      Arrays.asList("item", "estimate", "lower_bound", "upper_bound"),
      Arrays.asList(
          PrimitiveObjectInspectorFactory.javaStringObjectInspector,
          PrimitiveObjectInspectorFactory.javaLongObjectInspector,
          PrimitiveObjectInspectorFactory.javaLongObjectInspector,
          PrimitiveObjectInspectorFactory.javaLongObjectInspector
      )
    );
  }

  @Override
  public void process(final Object[] data) throws HiveException {
    if (data == null || data[0] == null) { return; }
    final BytesWritable serializedSketch =
        (BytesWritable) inputObjectInspector.getPrimitiveWritableObject(data[0]);
    final ItemsSketch sketch = ItemsSketch.getInstance(
        Memory.wrap(serializedSketch.getBytes()), new ArrayOfStringsSerDe());
    ErrorType errorType = ErrorType.NO_FALSE_POSITIVES;
    if (data.length > 1) {
      errorType = ErrorType.valueOf((String) errorTypeObjectInspector.getPrimitiveJavaObject(data[1]));
    }
    final ItemsSketch.Row[] result = sketch.getFrequentItems(errorType);
    for (int i = 0; i < result.length; i++) {
      forward(new Object[] {
        result[i].getItem(), result[i].getEstimate(), result[i].getLowerBound(), result[i].getUpperBound()
      });
    }
  }

  @Override
  public void close() throws HiveException {
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy