brickhouse.hbase.BatchPutUDAF Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of brickhouse Show documentation
Show all versions of brickhouse Show documentation
Extensions of Hive for the Data Developer
The newest version!
package brickhouse.hbase;
/**
* Copyright 2012 Klout, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**/
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.log4j.Logger;
/**
* Retrieve from HBase by doing bulk s from an aggregate function call.
*
*/
@Description(name="hbase_batch_put",
value = "_FUNC_(config_map, key, value) - Perform batch HBase updates of a table "
)
public class BatchPutUDAF extends AbstractGenericUDAFResolver {
private static final Logger LOG = Logger.getLogger( BatchPutUDAF.class);
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
throws SemanticException {
for(int i=0; i configMap;
private HTable table;
public ObjectInspector init(Mode m, ObjectInspector[] parameters)
throws HiveException {
super.init(m, parameters);
// init output object inspectors
/// input will be key, value and batch size
LOG.info(" Init mode = " + m );
System.out.println(" Init mode = " + m );
System.out.println(" parameters = = " + parameters + " Length = " + parameters.length );
configMap = new HashMap();
for( int k=0; k< parameters.length; ++k) {
LOG.info( "Param " + k + " is " + parameters[k]);
System.out.println( "Param " + k + " is " + parameters[k]);
}
if (m == Mode.PARTIAL1 || m == Mode.COMPLETE ) {
configMap = HTableFactory.getConfigFromConstMapInspector(parameters[0]);
HTableFactory.checkConfig( configMap);
inputKeyOI = (PrimitiveObjectInspector) parameters[1];
inputValOI = (PrimitiveObjectInspector) parameters[2];
try {
LOG.info(" Initializing HTable ");
table = HTableFactory.getHTable( configMap);
if(configMap.containsKey(BATCH_SIZE_TAG)) {
batchSize = Integer.parseInt( configMap.get( BATCH_SIZE_TAG));
}
} catch (IOException e) {
throw new HiveException(e);
}
} else {
listKVOI = (StandardListObjectInspector) parameters[0];
}
if( m == Mode.PARTIAL1 || m == Mode.PARTIAL2) {
return ObjectInspectorFactory
.getStandardListObjectInspector(
ObjectInspectorFactory.getStandardListObjectInspector(
PrimitiveObjectInspectorFactory.javaStringObjectInspector ) );
} else {
/// Otherwise return a message
return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
}
}
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
PutBuffer buff= new PutBuffer();
reset(buff);
return buff;
}
@Override
public void iterate(AggregationBuffer agg, Object[] parameters)
throws HiveException {
String key = getByteString( parameters[1], inputKeyOI);
String val = getByteString( parameters[2], inputValOI);
PutBuffer kvBuff = (PutBuffer) agg;
kvBuff.addKeyValue( key,val);
if(kvBuff.putList.size() >= batchSize) {
batchUpdate( kvBuff, false);
}
}
/**
*
* @param obj
* @param objInsp
* @return
*/
private String getByteString( Object obj, PrimitiveObjectInspector objInsp) {
switch( objInsp.getPrimitiveCategory() ) {
case STRING :
StringObjectInspector strInspector = (StringObjectInspector) objInsp;
return strInspector.getPrimitiveJavaObject(obj);
case BINARY :
BinaryObjectInspector binInspector = (BinaryObjectInspector) objInsp;
return new String(binInspector.getPrimitiveJavaObject( obj));
/// XXX TODO interpret other types, like ints or doubled
default :
return null;
}
}
protected void batchUpdate( PutBuffer kvBuff, boolean flushCommits) throws HiveException {
try {
HTable htable = HTableFactory.getHTable(configMap);
htable.put( kvBuff.putList);
if(flushCommits)
htable.flushCommits();
numPutRecords += kvBuff.putList.size();
if(kvBuff.putList.size() > 0)
LOG.info(" Doing Batch Put " + kvBuff.putList.size() + " records; Total put records = " + numPutRecords + " ; Start = " + (new String(kvBuff.putList.get(0).getRow())) + " ; End = " + ( new String( kvBuff.putList.get( kvBuff.putList.size()-1).getRow())));
else
LOG.info( " Doing Batch Put with ZERO 0 records");
kvBuff.putList.clear();
} catch (IOException e) {
throw new HiveException(e);
}
}
@Override
public void merge(AggregationBuffer agg, Object partial)
throws HiveException {
PutBuffer myagg = (PutBuffer) agg;
List