All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sequoiadb.hadoop.split.SdbSplitFactory Maven / Gradle / Ivy

The newest version!
package com.sequoiadb.hadoop.split;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.bson.BSONObject;
import org.bson.BasicBSONObject;
import org.bson.types.BasicBSONList;
import org.bson.util.JSON;

import com.sequoiadb.base.DBCollection;
import com.sequoiadb.base.DBCursor;
import com.sequoiadb.base.Sequoiadb;
import com.sequoiadb.exception.BaseException;
import com.sequoiadb.hadoop.util.SdbConnAddr;
import com.sequoiadb.hadoop.util.SequoiadbConfigUtil;

/**
 * 
 * 
 * @className閿涙瓔dbSplit
 * 
 * @author閿涳拷gaoshengjie
 * 
 * @createtime:2013楠烇拷2閺堬拷1閺冿拷娑撳﹤宕�0:46:36
 * 
 * @changetime:TODO
 * 
 * @version 1.0.0
 * 
 */
public class SdbSplitFactory {
	private static final Log log = LogFactory.getLog(SdbSplitFactory.class);

	/**
	 * 
	 * @content:depend the method of scan ,return different split;
	 * 
	 * @param jobContext
	 * @return
	 * 
	 * @exception
	 * @since 1.0.0
	 */
	public static List getSplits(JobContext jobContext) {

		Configuration conf = jobContext.getConfiguration();

		String urls = SequoiadbConfigUtil.getInputURL(conf);
		SdbConnAddr[] sdbConnAddrs = SequoiadbConfigUtil.getAddrList(urls);
		
		String user = SequoiadbConfigUtil.getInputUser(conf);
		String passwd = SequoiadbConfigUtil.getInputPasswd(conf);
		
		String preferedInstance=SequoiadbConfigUtil.getPreferenceInstance(conf);
		String collectionName = SequoiadbConfigUtil.getInCollectionName(conf);
		String collectionSpaceName = SequoiadbConfigUtil.getInCollectionSpaceName(conf);
		String queryStr = SequoiadbConfigUtil.getQueryString(conf);
		String selectorStr = SequoiadbConfigUtil.getSelectorString(conf);
		log.debug("test sdbConnAddrs whether it can connect or not ");
		Sequoiadb sdb = null;
		BaseException lastException = null;
		boolean flag = false;
		for (int i = 0; i < sdbConnAddrs.length; i++) {
			try {
				sdb = new Sequoiadb(sdbConnAddrs[i].getHost(),
						sdbConnAddrs[i].getPort(), user, passwd);
				
				//check preferedInstance's type
				if (preferedInstance.equalsIgnoreCase("slave")){
					preferedInstance = "S";
				}else if(preferedInstance.equalsIgnoreCase("master")){
					preferedInstance = "M";
				}else if(preferedInstance.equalsIgnoreCase("anyone")){
					preferedInstance = "A";
				}else if(preferedInstance.equalsIgnoreCase("node1") ||
						 preferedInstance.equalsIgnoreCase("node2") ||
						 preferedInstance.equalsIgnoreCase("node3") ||
						 preferedInstance.equalsIgnoreCase("node4") ||
						 preferedInstance.equalsIgnoreCase("node5") ||
						 preferedInstance.equalsIgnoreCase("node6") ||
						 preferedInstance.equalsIgnoreCase("node7"))
				{
					preferedInstance = preferedInstance.substring(4, 5);
				}else{
					log.warn("conf set 'preferedInstance' = " + preferedInstance + ", this type is undefine, use preferedInstance = 'slave'");
					preferedInstance = "S";
				}
				sdb.setSessionAttr(new BasicBSONObject("PreferedInstance",preferedInstance));
				break;
			} catch (BaseException e) {
				lastException = e;
			}
		}
		if (sdb == null) {
			throw lastException;
		}

		log.debug("start get data blocks");

		if (collectionSpaceName == null || collectionName == null) {
			throw new IllegalArgumentException(
					"collectionSpaceName and collectionName must have value");
		}
		
		if(!sdb.isCollectionSpaceExist(collectionSpaceName)){
			throw new IllegalArgumentException(
					"collectionSpaceName not exist");
		}
		
		if(!sdb.getCollectionSpace(collectionSpaceName).isCollectionExist(collectionName)){
			throw new IllegalArgumentException(
					"collectionName not exist");
		}
		DBCollection collection = sdb.getCollectionSpace(collectionSpaceName).getCollection(collectionName);
    	BSONObject queryBson = null;
    	BSONObject selectorBson = null;
    	BSONObject orderbyBson = null;	
    	//婵″倹鐏夐弽鐓庣础閺堝妫舵0妯诲閸戝搫绱撶敮闈╃礉
    	if ( queryStr != null){  
    		try {
    			queryBson = (BSONObject) JSON.parse( queryStr );
			} catch (Exception e) {
				queryBson = null;
				log.warn("query string is error");
//				throw new IllegalArgumentException(
//						"sequoiadb.query.json is wrong");
			}
    	}
    	if ( selectorStr != null){ 
    		try {
    			selectorBson = (BSONObject) JSON.parse( selectorStr );
    			selectorBson.put("_id", null);
			} catch (Exception e) {
				selectorBson = null;
				log.warn("selector string is error");
//				throw new IllegalArgumentException(
//						"sequoiadb.selector.json is wrong");
			}
    	}
    	log.debug("explain");
    	DBCursor explainCurl=collection.explain(queryBson, selectorBson, null, null, 0, 0, 0,new BasicBSONObject("Run",false));
    	Set subCls=new HashSet();
    	while(explainCurl.hasNext()){
    		BSONObject explainBson=explainCurl.getNext();	
    		
    		BasicBSONList bsonlist = (BasicBSONList) explainBson.get("SubCollections");
    		if(bsonlist == null)
    		{
    			subCls.add((String)explainBson.get("Name"));
    		}else
    		{
    			for(int i=0;i splits = new ArrayList();
    	Iterator clName=subCls.iterator();
    	while(clName.hasNext()){
    		String temp=clName.next();
    		String[] items=temp.split("\\.");
    		collection = sdb.getCollectionSpace(items[0]).getCollection(items[1]);
    		collect(collection, splits, items[0], items[1]);
    	}   	

		log.debug("inputsplit  size is"+splits.size());		
		return splits;
	}
	
	private static void collect(DBCollection collection,List splits, String collectionSpaceName, String collectionName){	
		DBCursor cursor = collection.getQueryMeta(null, null, null, 0, -1, 0);
		while (cursor.hasNext()) {
			BSONObject obj = cursor.getNext();
			log.debug("meta record:" + obj.toString());

			String hostname = (String) obj.get("HostName");
			int port = Integer.parseInt((String) obj.get("ServiceName"));
			String scanType = (String) obj.get("ScanType");

			// TODO
			if ("ixscan".equals(scanType)) {
				String indexName = (String) obj.get("IndexName");
				int indexLID = (Integer) obj.get("IndexLID");
				int direction = (Integer) obj.get("Direction");

				BasicBSONList indexBlockList = (BasicBSONList) obj
						.get("Indexblocks");

				for (Object objBlock : indexBlockList) {
					if (objBlock instanceof BSONObject) {
						BSONObject indexBlock = (BSONObject) objBlock;
						BSONObject startKey = (BSONObject) indexBlock
								.get("StartKey");
						BSONObject endKey = (BSONObject) indexBlock
								.get("EndKey");
					}
				}

			}

			if ("tbscan".equals(scanType)) {
				BasicBSONList blockList = (BasicBSONList) obj.get("Datablocks");
				
				int i = 0;
				for (Object objBlock : blockList) {
					if (objBlock instanceof Integer) {
						Integer blockId = (Integer) objBlock;
						splits.add(new SdbBlockSplit(new SdbConnAddr(hostname,
								port), scanType, blockId, collectionSpaceName, collectionName));
					}
				}
			}

		}
	}
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy