All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sequoiadb.hadoop.io.SequoiadbWriter Maven / Gradle / Ivy

The newest version!
package com.sequoiadb.hadoop.io;


import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.bson.BSONObject;
import org.bson.BasicBSONObject;
import org.bson.types.ObjectId;

import com.sequoiadb.base.CollectionSpace;
import com.sequoiadb.base.DBCollection;
import com.sequoiadb.base.Sequoiadb;
import com.sequoiadb.hadoop.util.SdbConnAddr;

public class SequoiadbWriter extends RecordWriter {
	private static final Log log = LogFactory.getLog(SequoiadbWriter.class);
	
	static class BsonDeal{
		private BSONObject bson;
		public BsonDeal( BSONObject _bson ){
			this.bson = _bson;
		}
		public void setBson( BSONObject _bson ){
			this.bson = _bson;
		}
		public BSONObject getBson(){
			return this.bson;
		}
		public BSONObject getBsonWithoutId(){
			BSONObject newBson = new BasicBSONObject();
			Set keySet = this.bson.keySet();
			java.util.Iterator it = keySet.iterator();
			while( it.hasNext() ){
				String key=it.next().toString();
				if ( key.equalsIgnoreCase("_id") ){
					continue;
				}
				newBson.put(key, this.bson.get(key));
			}
			return newBson;
		}
		public ObjectId getBsonId(){
			
			Set keySet = this.bson.keySet();
			java.util.Iterator it = keySet.iterator();
			while( it.hasNext() ){
				String key=it.next().toString();
				if ( key.equalsIgnoreCase("_id") ){
					return (ObjectId) this.bson.get("_id");
				}
			}
			return null;
		}
	}

	private DBCollection dbCollection;
	private Sequoiadb sequoiadb;
	private List lstBsonBuffer = null;
	private int bulkNum;
	private String writeType = null;

	// this function for SequoiadbOutputFormat.class
	public SequoiadbWriter(String collectionSpaceName, String collectionName,
			SdbConnAddr sdbConnAddr, String user, String passwd, int bulkNum, String writeType) {
		super();
		this.sequoiadb = new Sequoiadb(sdbConnAddr.getHost(),
				sdbConnAddr.getPort(), user, passwd);
		
		CollectionSpace space=null;
		if(collectionSpaceName == null)
			throw new IllegalArgumentException(" the output collection space is null");
		if(collectionName == null)
			throw new IllegalArgumentException(" the output collection is null");
		
		if(sequoiadb.isCollectionSpaceExist(collectionSpaceName)){
			space = sequoiadb.getCollectionSpace(collectionSpaceName);	
		}else{
			sequoiadb.createCollectionSpace(collectionSpaceName);
		}
		
		if(space.isCollectionExist(collectionName)){
			this.dbCollection=space.getCollection(collectionName);
		}else{
			this.dbCollection=space.createCollection(collectionName);
		}
		
		this.lstBsonBuffer = new ArrayList(bulkNum);
		this.bulkNum = bulkNum;
		if ( writeType.equalsIgnoreCase("bulkinsert") || writeType.equalsIgnoreCase("upsert") ){
			this.writeType = writeType;
		}else{
			log.warn("writeType != bulkinsert and writeType != upsert,use default value");
			this.writeType = "upsert";
		}
		log.debug("writeType = " + this.writeType);
	}
	
	// this function for SequoiadbMergeFormat.class
	public SequoiadbWriter(String collectionSpaceName, String collectionName,
			SdbConnAddr sdbConnAddr, String user, String passwd, String writeType) {
		super();
		this.sequoiadb = new Sequoiadb(sdbConnAddr.getHost(),
				sdbConnAddr.getPort(), user, passwd);
		
		CollectionSpace space=null;
		if(sequoiadb.isCollectionSpaceExist(collectionSpaceName)){
			space = sequoiadb.getCollectionSpace(collectionSpaceName);	
		}else{
			sequoiadb.createCollectionSpace(collectionSpaceName);
		}
		
		if(space.isCollectionExist(collectionName)){
			this.dbCollection=space.getCollection(collectionName);
		}else{
			this.dbCollection=space.createCollection(collectionName);
		}
		
		
		if ( writeType.equalsIgnoreCase("bulkinsert") || writeType.equalsIgnoreCase("upsert") ){
			this.writeType = writeType;
		}else{
			log.warn("writeType != bulkinsert and writeType != upsert,use default value");
			this.writeType = "upsert";
		}
		log.debug("writeType = " + this.writeType);
	}

	@Override
	public void close(TaskAttemptContext arg0) throws IOException,
			InterruptedException {
		if( this.writeType.equalsIgnoreCase("bulkinsert") ){
			if(lstBsonBuffer.size()>0){
				this.dbCollection.bulkInsert(lstBsonBuffer, DBCollection.FLG_INSERT_CONTONDUP);
				lstBsonBuffer.clear();
			}
		}
		if (this.sequoiadb != null) {
			this.sequoiadb.disconnect();
		}
	}

	@Override
	public void write(K key, V value) throws IOException, InterruptedException {
//		log.info("write");
		BSONObject bson = null;

		if (value != null) {
			if (value instanceof BSONWritable) {
				bson = ((BSONWritable) value).getBson();
			} else if (value instanceof BSONObject) {
				bson = (BSONObject) value;
			} else {
				try {
					bson = BasicBSONObject.typeToBson(value);
				} catch (Exception e) {
					// TODO Auto-generated catch block
					log.error("Failed convert value to bson", e);
				}
			}
		}
		
		if (key != null && !(key instanceof NullWritable)) {
			if (key instanceof Text) {
				bson.put("_id", new ObjectId(((Text) key).toString()));
			} else if (key instanceof ObjectId) {
				bson.put("_id", key);
			} else {
				bson.put("_id", new ObjectId(key.toString()));
			}
		}
		
		if ( this.writeType.equalsIgnoreCase("bulkinsert") ){
			if (lstBsonBuffer.size() < bulkNum) {
				lstBsonBuffer.add(bson);
			} else {
				this.dbCollection.bulkInsert(lstBsonBuffer, DBCollection.FLG_INSERT_CONTONDUP);
				lstBsonBuffer.clear();
				lstBsonBuffer.add(bson);
			}
		}else if ( this.writeType.equalsIgnoreCase("upsert") ){
			BsonDeal bsonDeal = new BsonDeal( bson );
			bson = bsonDeal.getBsonWithoutId();
			
			BSONObject bson_rule = new BasicBSONObject();
			BSONObject bson_query = new BasicBSONObject();
			bson_query.put("_id", bsonDeal.getBsonId());
			bson_rule.put("$set", bson);
			this.dbCollection.upsert(bson_query, bson_rule, null);
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy