All Downloads are FREE. Search and download functionalities are using the official Maven repository.

us.codecraft.webmagic.model.samples.OschinaBlog Maven / Gradle / Ivy

There is a newer version: 1.0.2
Show newest version
package us.codecraft.webmagic.model.samples;

import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.pipeline.PageModelPipeline;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.TargetUrl;

import java.util.List;

/**
 * @author [email protected] 
*/ @TargetUrl("http://my.oschina.net/flashsword/blog/\\d+") public class OschinaBlog{ @ExtractBy("//title") private String title; @ExtractBy(value = "div.BlogContent",type = ExtractBy.Type.Css) private String content; @ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true) private List tags; public static void main(String[] args) { OOSpider.create(Site.me() .setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36") .setSleepTime(0) .setRetryTimes(3) ,new PageModelPipeline() { @Override public void process(Object o, Task task) { } }, OschinaBlog.class).thread(10).addUrl("http://my.oschina.net/flashsword/blog").run(); } public String getTitle() { return title; } public String getContent() { return content; } public List getTags() { return tags; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy