All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cn.miw.spider.utils.CatJSONP Maven / Gradle / Ivy

There is a newer version: 0.0.2
Show newest version
package cn.miw.spider.utils;

import java.io.IOException;

import org.jsoup.nodes.Document;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

public class CatJSONP implements ICatTask{
	private Object flag = "智联招聘";
	private long page = 0, fin = 40000;
	private int step = 100;
	private int delay = 5000;
	private String url = "https://fe-api.zhaopin.com/c/i/sou?pageSize=100&cityId=831&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&kt=3&start={{page}}";
	private String listSelector = "data.results";
	private String attrs = null;// "id:id,number:seq,jobType:jobType,company.name:comp,positionURL:jobUrl,jobName,welfare";

	private int prefix=0,endfix=0;
	
	
	public CatJSONP(Object flag, long page, long fin, int step, int delay, String url, String listSelector,
			String attrs,String prefix,String endfix) {
		super();
		this.flag = flag;
		this.page = page;
		this.fin = fin;
		this.step = step;
		this.delay = delay;
		this.url = url;
		this.listSelector = listSelector;
		this.attrs = attrs;
		this.prefix = prefix.length();
		this.endfix = endfix.length();
	}
	public CatJSONP(Object flag, long page, long fin, int step, int delay, String url, String listSelector,
			String attrs,int prefixLength,int endfixLength) {
		super();
		this.flag = flag;
		this.page = page;
		this.fin = fin;
		this.step = step;
		this.delay = delay;
		this.url = url;
		this.listSelector = listSelector;
		this.attrs = attrs;
		this.prefix = prefixLength;
		this.endfix = endfixLength;
	}

	public void start(ICatCallBack callBack) {
		catPage(flag, url, page, step, fin, listSelector, attrs, delay, callBack);
	}

	private void catPage(Object flag, String sourceUrl, long startPage, long step, long fin, String listSelector,
			String attrs, int delay, ICatCallBack callBack) {
		new Thread(new Runnable() {

			@Override
			public void run() {
				long page = startPage;
				do {
					String url = sourceUrl.replace("{{page}}", page + "");
					Document doc;
					try {
						doc = Client.JSoupGetDocument(url);
						System.out.println(doc.text());
						String x = doc.text();
						if(x!=null && x.length() > prefix) {
							x = x.substring(prefix,x.length()-endfix);
							x = PreProcess(x);
							JSONArray result = parseList(x, listSelector, attrs);
							if (callBack != null)
								callBack.catAPage(flag, page, result);
						}
						
						page = page + step;
						pages++;
						Thread.sleep(delay);
					} catch (IOException e) {
						e.printStackTrace();
					} catch (InterruptedException e) {
						e.printStackTrace();
					}
				} while (page < fin);
				if (callBack != null)
					callBack.catFin(flag, pages, count);
			}
		}).start();
	}

	private JSONArray parseList(String jsonStr, String list, String attrs) {
		JSONObject json = JSONObject.parseObject(jsonStr);
		JSONArray result = new JSONArray();
		JSONArray data = JSONKit.getArray(json, list);
		for (Object d : data) {
			JSONObject j = (JSONObject) d;
			JSONObject item = j;
			if (attrs != null && attrs.trim().length() > 0) {
				item = new JSONObject();
				for (String key : attrs.split(",")) {
					String k = key.trim();
					String s = k;
					if (key.contains(":")) {
						k = key.split(":")[0].trim();
						s = key.split(":")[1].trim();
					}
					// System.out.println(s + "\t==>" + JSONKit.get(j, k));
					item.put(s, JSONKit.get(j, k));
				}
			}
			result.add(item);
		}
		count += result.size();
		return result;
	}

	private long count = 0;
	private long pages = 0;
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy