brickhouse.udf.sketch.EstimatedReachUDF Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of brickhouse Show documentation
Show all versions of brickhouse Show documentation
Extensions of Hive for the Data Developer
package brickhouse.udf.sketch;
/**
* Copyright 2012 Klout, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**/
import java.util.List;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.log4j.Logger;
import brickhouse.analytics.uniques.SketchSet;
/**
* Interpret a list of strings as a sketch_set
* and return an estimated reach number
*
*/
@Description(name="estimated_reach",
value = "_FUNC_(x) - Estimate reach from a sketch set of Strings. "
)
public class EstimatedReachUDF extends GenericUDF {
private static final Logger LOG = Logger.getLogger( EstimatedReachUDF.class);
private ListObjectInspector listInspector;
private PrimitiveObjectInspector elemInspector;
private PrimitiveCategory elemCategory;
private IntObjectInspector lengthInspector;
@Override
public Object evaluate(DeferredObject[] arg0) throws HiveException {
Object listObj = arg0[0].get();
int maxItems = SketchSet.DEFAULT_MAX_ITEMS;
if( arg0.length > 1) {
maxItems = lengthInspector.get( arg0[1].get());
}
int listLen = listInspector.getListLength( listObj);
if( listLen < maxItems ) {
return (long)listLen;
}
if( listLen > maxItems) {
LOG.warn( "estimated_reach: List length " + listLen + " is greater than sketch set Max items " + maxItems);
}
Object uninspMax = listInspector.getListElement( listObj, maxItems -1);
switch( this.elemCategory) {
case STRING :
StringObjectInspector strInspector = (StringObjectInspector) elemInspector;
String lastItem = strInspector.getPrimitiveJavaObject(uninspMax);
double reach = SketchSet.EstimatedReach( lastItem, maxItems);
return (long)(reach);
case LONG :
LongObjectInspector longInspector = (LongObjectInspector) elemInspector;
long lastHash = longInspector.get(uninspMax);
double reachHash = SketchSet.EstimatedReach( lastHash, maxItems);
return (long)(reachHash);
default:
/// should not happen
throw new HiveException("Unexpected category type");
}
}
@Override
public String getDisplayString(String[] arg0) {
StringBuilder sb = new StringBuilder("estimated_reach( ");
for(int i=0; i 1) {
if( !(arg0[1] instanceof IntObjectInspector)) {
throw new UDFArgumentException("estimated_reach takes an array of strings or an array of hashes, and an optional sketch size");
}
this.lengthInspector = (IntObjectInspector) arg0[1];
}
return PrimitiveObjectInspectorFactory.javaLongObjectInspector;
}
}