All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bytegriffin.get4j.sample.HuxiuPageParser Maven / Gradle / Ivy

The newest version!
package com.bytegriffin.get4j.sample;

import com.bytegriffin.get4j.Spider;
import com.bytegriffin.get4j.core.Page;
import com.bytegriffin.get4j.parse.PageParser;

/**
 * 虎嗅网
 */
public class HuxiuPageParser implements PageParser {

    @Override
    public void parse(Page page) {
        System.err.println(page.getTitle() + "   " + page.getMethod()+ "  " + page.getUrl() + "  ");
    }

    /**
     * 接口内容:Json内容中包含着Html
     * @param args String[] 
     * @throws Exception 异常
     */
    public static void main(String[] args) throws Exception {
        Spider.list_detail().fetchUrl("https://www.huxiu.com/v2_action/article_list?page={1}")
        		.parser(HuxiuPageParser.class).defaultUserAgent().post().sleep(3)
                .defaultUserAgent().detailLinkSelector("$.data|a.transition.msubstr-row2[href]")
                .thread(1).start();

    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy