All Downloads are FREE. Search and download functionalities are using the official Maven repository.

brickhouse.hbase.BatchPutUDAF Maven / Gradle / Ivy

The newest version!
package brickhouse.hbase;
/**
 * Copyright 2012 Klout, Inc
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 **/

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.log4j.Logger;

/**
 *   Retrieve from HBase by doing bulk s from an aggregate function call.
 *
 */

@Description(name="hbase_batch_put",
value = "_FUNC_(config_map, key, value) - Perform batch HBase updates of a table " 
)
public class BatchPutUDAF extends AbstractGenericUDAFResolver {
	private static final Logger LOG = Logger.getLogger( BatchPutUDAF.class);
	


	@Override
	public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
			throws SemanticException {
		for(int i=0; i configMap;
		
		private HTable table;



		public ObjectInspector init(Mode m, ObjectInspector[] parameters)
				throws HiveException {
			super.init(m, parameters);
			// init output object inspectors
			///  input will be key, value and batch size
			LOG.info(" Init mode = " + m );
			System.out.println(" Init mode = " + m );
			System.out.println(" parameters =  = " + parameters + " Length = " + parameters.length );
			configMap = new HashMap();
	        for( int k=0; k< parameters.length; ++k) {
	        	LOG.info( "Param " + k + " is " + parameters[k]);
	        	System.out.println( "Param " + k + " is " + parameters[k]);
	        }
	        
			if (m == Mode.PARTIAL1 || m == Mode.COMPLETE ) {
				configMap = HTableFactory.getConfigFromConstMapInspector(parameters[0]);
				HTableFactory.checkConfig( configMap);
				
				
				inputKeyOI = (PrimitiveObjectInspector) parameters[1];
				inputValOI = (PrimitiveObjectInspector) parameters[2];
				
				
				try {
					LOG.info(" Initializing HTable ");
					table = HTableFactory.getHTable( configMap);
					
					if(configMap.containsKey(BATCH_SIZE_TAG)) {
						batchSize = Integer.parseInt( configMap.get( BATCH_SIZE_TAG));
					}
				} catch (IOException e) {
					throw new HiveException(e);
				}
			} else {
			  listKVOI = (StandardListObjectInspector) parameters[0];
				
			}
			
			if( m == Mode.PARTIAL1 || m  == Mode.PARTIAL2) {
			   return ObjectInspectorFactory
						.getStandardListObjectInspector(
								ObjectInspectorFactory.getStandardListObjectInspector(
										PrimitiveObjectInspectorFactory.javaStringObjectInspector ) );
			} else {
				/// Otherwise return a message
				return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
			}
		}

		@Override
		public AggregationBuffer getNewAggregationBuffer() throws HiveException {
			PutBuffer buff= new PutBuffer();
			reset(buff);
			return buff;
		}

		@Override
		public void iterate(AggregationBuffer agg, Object[] parameters)
				throws HiveException {
			String key = getByteString( parameters[1], inputKeyOI);
			String val = getByteString( parameters[2], inputValOI);
			
			PutBuffer kvBuff = (PutBuffer) agg;
			kvBuff.addKeyValue( key,val);

			if(kvBuff.putList.size() >= batchSize) {
				batchUpdate( kvBuff, false);
			}
		}
		
		
		/**
		 * 
		 * @param obj
		 * @param objInsp
		 * @return
		 */
		private String getByteString( Object obj, PrimitiveObjectInspector objInsp) {
		    switch( objInsp.getPrimitiveCategory() ) {
		    case STRING : 
		        StringObjectInspector strInspector = (StringObjectInspector) objInsp;
		        return strInspector.getPrimitiveJavaObject(obj);
		    case BINARY : 
		        BinaryObjectInspector binInspector = (BinaryObjectInspector) objInsp;
		        return new String(binInspector.getPrimitiveJavaObject( obj));
		    /// XXX TODO interpret other types, like ints or doubled 
		     default :
		        return null; 
		    }
		}
		
		protected void batchUpdate( PutBuffer  kvBuff, boolean flushCommits) throws HiveException { 
			try {
				
				HTable htable = HTableFactory.getHTable(configMap);
				
				htable.put( kvBuff.putList);
				if(flushCommits) 
				   htable.flushCommits();
				numPutRecords += kvBuff.putList.size();
				if(kvBuff.putList.size() > 0)
				  LOG.info(" Doing Batch Put " + kvBuff.putList.size() + " records; Total put records = " + numPutRecords + " ; Start = " + (new String(kvBuff.putList.get(0).getRow()))  + " ; End = " + ( new String( kvBuff.putList.get( kvBuff.putList.size()-1).getRow())));
				else
					LOG.info( " Doing Batch Put with ZERO 0 records");
				kvBuff.putList.clear();
				
				
			} catch (IOException e) {
				throw new HiveException(e);
			}
		}

		@Override
		public void merge(AggregationBuffer agg, Object partial)
				throws HiveException {
			PutBuffer myagg = (PutBuffer) agg;
			List partialResult = (List)this.listKVOI.getList(partial);
			ListObjectInspector subListOI = (ListObjectInspector) listKVOI.getListElementObjectInspector();
		
			List first = subListOI.getList( partialResult.get(0));
			String tableName = ((StringObjectInspector)(subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(0));
			configMap.put( HTableFactory.TABLE_NAME_TAG, tableName);
			String zookeeper = ((StringObjectInspector)(subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(1));
			configMap.put( HTableFactory.ZOOKEEPER_QUORUM_TAG, zookeeper);
			String family = ((StringObjectInspector)(subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(2));
			configMap.put( HTableFactory.FAMILY_TAG, family);
			String qualifier = ((StringObjectInspector)(subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(3));
			configMap.put( HTableFactory.QUALIFIER_TAG, qualifier);
			//// Include arbitrary configurations, by adding strings of the form k=v
			for(int j=4; j < first.size(); ++j ) {
				String kvStr =  ((StringObjectInspector)(subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(first.get(j));
				String[] kvArr = kvStr.split("=");
				if(kvArr.length == 2 ) {
					configMap.put( kvArr[0], kvArr[1]);
				}
			}
			
			for(int i=2; i< partialResult.size(); ++i) {
				
			   List kvList = subListOI.getList( partialResult.get(i));
			   String key = ((StringObjectInspector)(subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(kvList.get(0));
			   String val = ((StringObjectInspector)(subListOI.getListElementObjectInspector())).getPrimitiveJavaObject(kvList.get(1));
			   
			   myagg.addKeyValue( key, val);
			   
			}
			
			if(myagg.putList.size() >= batchSize) {
				batchUpdate( myagg, false);
			}
		}

		@Override
		public void reset(AggregationBuffer buff) throws HiveException {
			PutBuffer putBuffer = (PutBuffer) buff;
			putBuffer.reset();
		}

		@Override
		public Object terminate(AggregationBuffer agg) throws HiveException {
			PutBuffer myagg = (PutBuffer) agg;
			batchUpdate( myagg, true);
			return "Finished Batch updates ; Num Puts = " + numPutRecords ; 

		}


		@Override
		public Object terminatePartial(AggregationBuffer agg) throws HiveException {
			PutBuffer myagg = (PutBuffer) agg;
			
			
			ArrayList> ret = new ArrayList>();
			ArrayList tname = new ArrayList();
			tname.add( configMap.get( HTableFactory.TABLE_NAME_TAG));
			tname.add( configMap.get( HTableFactory.ZOOKEEPER_QUORUM_TAG));
			tname.add( configMap.get( HTableFactory.FAMILY_TAG) );
			tname.add( configMap.get( HTableFactory.QUALIFIER_TAG ));
			
			for( Entry entry : configMap.entrySet() ) {
				if(!entry.getKey().equals( HTableFactory.TABLE_NAME_TAG)
						&& !entry.getKey().equals( HTableFactory.ZOOKEEPER_QUORUM_TAG )
						&& !entry.getKey().equals( HTableFactory.FAMILY_TAG )
						&& !entry.getKey().equals( HTableFactory.QUALIFIER_TAG ) ) {
					
					tname.add( entry.getKey() + "=" + entry.getValue());
				}
			}
			ret.add( tname);
			
			for(Put thePut : myagg.putList) {
				ArrayList kvList = new ArrayList();
				kvList.add( new String(thePut.getRow() )  );
			    Map> familyMap = thePut.getFamilyMap();
			    for( List innerList : familyMap.values() ) {
			    	for(KeyValue kv : innerList) {
			    		kvList.add( new String( kv.getValue() ));
			    	}
			    }
			    ret.add( kvList);
			}
			
			return ret;
		}
	}



}