All Downloads are FREE. Search and download functionalities are using the official Maven repository.

brickhouse.udf.sketch.EstimatedReachUDF Maven / Gradle / Ivy

The newest version!
package brickhouse.udf.sketch;
/**
 * Copyright 2012 Klout, Inc
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 **/


import java.util.List;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.log4j.Logger;

import brickhouse.analytics.uniques.SketchSet;

/**
 *  Interpret a list of strings as a sketch_set
 *  and return an estimated reach number
 *
 */
@Description(name="estimated_reach",
    value = "_FUNC_(x) - Estimate reach from a  sketch set of Strings. "
)
public class EstimatedReachUDF extends GenericUDF {
	private static final Logger LOG = Logger.getLogger( EstimatedReachUDF.class);
	
	private ListObjectInspector listInspector;
	private PrimitiveObjectInspector elemInspector;
	private PrimitiveCategory elemCategory;
	private IntObjectInspector lengthInspector;
	

	@Override
	public Object evaluate(DeferredObject[] arg0) throws HiveException {
		Object listObj = arg0[0].get();
		int maxItems = SketchSet.DEFAULT_MAX_ITEMS;
		if( arg0.length > 1) {
			maxItems = lengthInspector.get( arg0[1].get());
		}
		
		int listLen = listInspector.getListLength( listObj);
		if( listLen < maxItems ) {
			return (long)listLen;
		}
		if( listLen > maxItems) {
			LOG.warn( "estimated_reach: List length " + listLen + " is greater than sketch set Max items " + maxItems);
		}
		Object uninspMax = listInspector.getListElement( listObj, maxItems -1);
		switch( this.elemCategory) {
		case STRING :
			StringObjectInspector strInspector = (StringObjectInspector) elemInspector;
			String lastItem = strInspector.getPrimitiveJavaObject(uninspMax);
			double reach = SketchSet.EstimatedReach( lastItem, maxItems);
			if( reach > listLen)
			   return (long)(reach);
			else 
			   return (long)listLen;
		case LONG :
			LongObjectInspector longInspector = (LongObjectInspector) elemInspector;
			long lastHash = longInspector.get(uninspMax);
			double reachHash = SketchSet.EstimatedReach( lastHash, maxItems);
			if( reachHash > listLen)
			   return (long)(reachHash);
			else 
			   return (long)listLen;
		 default:
			 /// should not happen
			throw new HiveException("Unexpected category type");
		} 
	}

	@Override
	public String getDisplayString(String[] arg0) {
		StringBuilder sb = new StringBuilder("estimated_reach( ");
		for(int i=0; i 1) {
			if( !(arg0[1] instanceof IntObjectInspector)) {
			  throw new UDFArgumentException("estimated_reach takes an array of strings or an array of hashes, and an optional sketch size");
				
			}
			this.lengthInspector = (IntObjectInspector) arg0[1];
		}
		
		return PrimitiveObjectInspectorFactory.javaLongObjectInspector;
	}
	

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy