All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.antbrains.urlcrawler.crawler.Driver Maven / Gradle / Ivy

package com.antbrains.urlcrawler.crawler;

import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options; 
import org.apache.commons.cli.PosixParser;
import org.apache.log4j.Logger;

import com.antbrains.httpclientfetcher.HttpClientFetcher; 
import com.antbrains.urlcrawler.db.CrawlTask;

public class Driver{
	protected static Logger logger=Logger.getLogger(Driver.class);
	private static final int DEF_TASK_QUEUE_SIZE=1000;
	private static final int DEF_RES_QUEUE_SIZE=1000;
	private static final int DEF_PRODUCER_BATCH_SIZE=100;
	public static void main(String[] args) throws Exception {
		CommandLineParser parser = new PosixParser();
		Options options = new Options();
		options.addOption("h", "help", false, "print help");
		options.addOption("taskQueueSize", true, "taskQueueSize " + DEF_TASK_QUEUE_SIZE);
		options.addOption("resQueueSize", true, "resQueueSize " + DEF_RES_QUEUE_SIZE);
		options.addOption("producerBatchSize", true, "producerBatchSize " + DEF_PRODUCER_BATCH_SIZE);
		options.addOption("zkPort", true, "zkPort null");
		
		CommandLine line = parser.parse(options, args);
		HelpFormatter formatter = new HelpFormatter();
		String helpStr = "Driver fetcherNumber zkQuorum stopPort dbName conAddr jmxUrl faeClass";
		args = line.getArgs();
		if (args.length !=7) {
			formatter.printHelp(helpStr, options);
			System.exit(-1);
		}
		
		int taskQueueSize=DEF_TASK_QUEUE_SIZE;
		if(line.hasOption("taskQueueSize")){
			taskQueueSize=Integer.valueOf(line.getOptionValue("taskQueueSize"));
		}
		int resQueueSize=DEF_RES_QUEUE_SIZE;
		if(line.hasOption("resQueueSize")){
			resQueueSize=Integer.valueOf(line.getOptionValue("resQueueSize"));
		}
		
		int producerBatchSize=DEF_PRODUCER_BATCH_SIZE;
		if(line.hasOption("producerBatchSize")){
			producerBatchSize=Integer.valueOf(line.getOptionValue("producerBatchSize"));
		}
		String zkPort=null;
		if(line.hasOption("zkPort")){
			zkPort=Integer.valueOf(line.getOptionValue("zkPort")).toString();
		}
		
		int workerNumber=Integer.valueOf(args[0]);
		String zkQuorum=args[1];
		int stopPort=Integer.valueOf(args[2]);
		String dbName=args[3];
		String conAddr=args[4];
		String jmxUrl=args[5];
		String faeClass=args[6];
		//print command args
		logger.info("workerNumber: "+workerNumber);
		logger.info("zkQuorum: "+zkQuorum);
		logger.info("stopPort: " +stopPort);
		logger.info("dbName: "+dbName);
		logger.info("conAddr: "+conAddr);
		logger.info("jmxUrl: "+jmxUrl);
		logger.info("faeClass: "+faeClass);
        
		
		//print options
		logger.info("taskQueueSize: "+taskQueueSize);
		logger.info("resQueueSize: "+resQueueSize);
		logger.info("producerBatchSize: "+producerBatchSize);
		logger.info("zkPort: "+zkPort);
		
		
		BlockingQueue taskQueue=new ArrayBlockingQueue<>(taskQueueSize);
		BlockingQueue resQueue=new ArrayBlockingQueue<>(resQueueSize);
		
		final TaskReceiver receiver = new TaskReceiver(dbName, conAddr, jmxUrl, taskQueue);
		receiver.start();
		
		final Fetcher[] workers=new Fetcher[workerNumber];
		HttpClientFetcher fetcher=new HttpClientFetcher(Driver.class.getSimpleName());
		
		fetcher.init();
		Class cls=Class.forName(faeClass);
		FetcherAndExtractor fae=(FetcherAndExtractor) cls.newInstance();
		for(int i=0;i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy