All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bytegriffin.get4j.sample.CsdnBlogPageParser Maven / Gradle / Ivy

The newest version!
package com.bytegriffin.get4j.sample;

import com.bytegriffin.get4j.Spider;
import com.bytegriffin.get4j.core.Page;
import com.bytegriffin.get4j.parse.PageParser;

public class CsdnBlogPageParser implements PageParser {

    @Override
    public void parse(Page page) {
    	if(page.isListPage()){ //第一次返回的是列表页数据
    		System.err.println(page.getUrl()+ "  " + page.json("$..channel"));
    	} else { //之后返回的是详情页数据
    		System.err.println(page.getUrl() + "  ");
    	}
    }

    public static void main(String[] args) throws Exception {//blog_list clearfix
        Spider.list_detail().fetchUrl("https://blog.csdn.net/api/articles?type=more&category=home&shown_offset=ad1531325255862708")
                .detailLinkSelector("$.articles.url")
                .parser(CsdnBlogPageParser.class).defaultUserAgent()
                .probe("$.articles.id", 10)
                //.jdbc("jdbc:mysql://localhost:3306/get4j?useSSL=false&serverTimezone=UTC&characterEncoding=UTF-8&user=root&password=root")
                //.mongodb("mongodb://localhost:27017")
                .thread(1).start();
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy