All Downloads are FREE. Search and download functionalities are using the official Maven repository.

us.codecraft.webmagic.example.OschinaBlog Maven / Gradle / Ivy

The newest version!
package us.codecraft.webmagic.example;

import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.Formatter;
import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline;

import java.util.Date;
import java.util.List;

/**
 * @author [email protected] 
* @since 0.3.2 */ @TargetUrl("http://my.oschina.net/flashsword/blog/\\d+") public class OschinaBlog { @ExtractBy("//title/text()") private String title; @ExtractBy(value = "div.BlogContent", type = ExtractBy.Type.Css) private String content; @ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true) private List tags; @ExtractBy("//div[@class='BlogStat']/regex('\\d+-\\d+-\\d+\\s+\\d+:\\d+')") private Date date; public static void main(String[] args) { //results will be saved to "/data/webmagic/" in json format OOSpider.create(Site.me(), new JsonFilePageModelPipeline("/data/webmagic/"), OschinaBlog.class) .addUrl("http://my.oschina.net/flashsword/blog").run(); } public String getTitle() { return title; } public String getContent() { return content; } public List getTags() { return tags; } public Date getDate() { return date; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy