us.codecraft.webmagic.model.samples.JokejiModel Maven / Gradle / Ivy
package us.codecraft.webmagic.model.samples;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.model.ConsolePageModelPipeline;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.HelpUrl;
import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.scheduler.RedisScheduler;
/**
* @author [email protected]
*/
@TargetUrl("http://www.jokeji.cn/jokehtml/jy/\\d+.htm")
@HelpUrl("http://www.jokeji.cn/list\\w+.htm")
public class JokejiModel {
@ExtractBy("//title/regex('([^_]+)',1)")
private String title;
@ExtractBy("//div[@class=mob_txt]/tidyText()")
private String content;
public static void main(String[] args) {
OOSpider.create(Site.me().setDomain("www.jokeji.cn").setCharset("gbk").setSleepTime(100).setTimeOut(3000)
.setUserAgent("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)")
, new ConsolePageModelPipeline(), JokejiModel.class).addUrl("http://www.jokeji.cn/").thread(2)
.scheduler(new RedisScheduler("127.0.0.1"))
.run();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy