us.codecraft.webmagic.scripts.ScriptConsole Maven / Gradle / Ivy
The newest version!
package us.codecraft.webmagic.scripts;
import org.apache.commons.cli.*;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.utils.WMCollections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* @author [email protected]
* @since 0.4.1
*/
public class ScriptConsole {
private static class Params {
Language language = Language.JavaScript;
String scriptFileName;
List urls;
int thread = 1;
int sleepTime = 1000;
private static Map> alias = new HashMap>();
static {
alias.put(Language.JavaScript, WMCollections.newHashSet("js", "javascript", "JavaScript", "JS"));
alias.put(Language.JRuby, WMCollections.newHashSet("ruby", "jruby", "Ruby", "JRuby"));
}
public void setLanguagefromArg(String arg) {
for (Map.Entry> languageSetEntry : alias.entrySet()) {
if (languageSetEntry.getValue().contains(arg)) {
this.language = languageSetEntry.getKey();
return;
}
}
}
private Language getLanguage() {
return language;
}
private void setLanguage(Language language) {
this.language = language;
}
private String getScriptFileName() {
return scriptFileName;
}
private void setScriptFileName(String scriptFileName) {
this.scriptFileName = scriptFileName;
}
private List getUrls() {
return urls;
}
private void setUrls(List urls) {
this.urls = urls;
}
private int getThread() {
return thread;
}
private void setThread(int thread) {
this.thread = thread;
}
private int getSleepTime() {
return sleepTime;
}
private void setSleepTime(int sleepTime) {
this.sleepTime = sleepTime;
}
}
public static void main(String[] args) {
Params params = parseCommand(args);
startSpider(params);
}
private static void startSpider(Params params) {
ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom()
.language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).thread(params.getThread()).build();
pageProcessor.getSite().setSleepTime(params.getSleepTime());
pageProcessor.getSite().setRetryTimes(3);
pageProcessor.getSite().setAcceptStatCode(WMCollections.newHashSet(200, 404,403, 500,502));
Spider spider = Spider.create(pageProcessor).thread(params.getThread());
spider.clearPipeline().addPipeline(new Pipeline() {
@Override
public void process(ResultItems resultItems, Task task) {
}
});
if (params.getUrls() == null || params.getUrls().size() == 0) {
System.err.println("Need at least one argument");
System.out.println("Usage: java -jar webmagic.jar [-l language] -f script file [-t threadnum] [-s sleep time] url1 [url2 url3]");
System.exit(-1);
}
for (String url : params.getUrls()) {
spider.addUrl(url);
}
spider.run();
}
private static Params parseCommand(String[] args) {
try {
Options options = new Options();
options.addOption(new Option("l", "language", true, "language"));
options.addOption(new Option("t", "thread", true, "thread"));
options.addOption(new Option("f", "file", true, "script file"));
options.addOption(new Option("i", "input", true, "input file"));
options.addOption(new Option("s", "sleep", true, "sleep time"));
options.addOption(new Option("g", "logger", true, "sleep time"));
CommandLineParser commandLineParser = new PosixParser();
CommandLine commandLine = commandLineParser.parse(options, args);
return readOptions(commandLine);
} catch (Exception e) {
e.printStackTrace();
exit();
return null;
}
}
private static void exit() {
System.err.println("Format error");
System.out.println("Usage: java -jar webmagic.jar [-l language] -f script file [-t threadnum] [-s sleep time] url1 [url2 url3]");
System.exit(-1);
}
private static Params readOptions(CommandLine commandLine) {
Params params = new Params();
if (commandLine.hasOption("l")) {
String language = commandLine.getOptionValue("l");
params.setLanguagefromArg(language);
}
if (commandLine.hasOption("f")) {
String scriptFilename = commandLine.getOptionValue("f");
params.setScriptFileName(scriptFilename);
} else {
exit();
}
if (commandLine.hasOption("s")) {
Integer sleepTime = Integer.parseInt(commandLine.getOptionValue("s"));
params.setSleepTime(sleepTime);
}
if (commandLine.hasOption("t")) {
Integer thread = Integer.parseInt(commandLine.getOptionValue("t"));
params.setThread(thread);
}
if (commandLine.hasOption("g")) {
configLogger(commandLine.getOptionValue("g"));
}
params.setUrls(commandLine.getArgList());
return params;
}
private static void configLogger(String value) {
Logger rootLogger = Logger.getRootLogger();
if ("debug".equalsIgnoreCase(value)) {
rootLogger.setLevel(Level.DEBUG);
} else if ("info".equalsIgnoreCase(value)) {
rootLogger.setLevel(Level.INFO);
} else if ("warn".equalsIgnoreCase(value)) {
rootLogger.setLevel(Level.WARN);
} else if ("trace".equalsIgnoreCase(value)) {
rootLogger.setLevel(Level.TRACE);
} else if ("off".equalsIgnoreCase(value)) {
rootLogger.setLevel(Level.OFF);
} else if ("error".equalsIgnoreCase(value)) {
rootLogger.setLevel(Level.ERROR);
}
}
}