All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.xbynet.crawler.http.DefaultDownloader Maven / Gradle / Ivy

package com.github.xbynet.crawler.http;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.IOUtils;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpUriRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.github.xbynet.crawler.Const;
import com.github.xbynet.crawler.Request;
import com.github.xbynet.crawler.Response;
import com.github.xbynet.crawler.Site;

public class DefaultDownloader extends AbsDownloader {
	private final Logger log = LoggerFactory.getLogger(DefaultDownloader.class);

	@Override
	public void download(Request request){
		super.doDownload(request);
	}
	@Override
	protected void process(HttpUriRequest httpUriRequest,
			CloseableHttpResponse resp, Request request, Site site,Response response,
			Object... extras) {
		if (resp == null) {
			log.error(request.getUrl() + "请求失败");
			return ;
		}
		response.setCode(resp.getStatusLine().getStatusCode());
		response.setContentType(resp.getFirstHeader("Content-Type").getValue());
		Const.ResponseType type = null;
		try {
			if (response.getContentType().contains("text")
					|| response.getContentType().contains("json")) {
				type = Const.ResponseType.TEXT;
				String raw=IOUtils.toString(resp.getEntity().getContent(),
						request.getEncoding() != null ? request.getEncoding()
								: site.getEncoding());
				response.setRaw(raw);
			} else {
				type = Const.ResponseType.BIN;
				response.setBytes(IOUtils.toByteArray(resp.getEntity()
						.getContent()));
			}
		} catch (UnsupportedOperationException e) {
			log.error("", e);
		} catch (IOException e) {
			log.error("", e);
		}
		response.setRespType(type);
		response.setRequest(request);
		
		Map> headers=new HashMap>();
		for(Header header:resp.getAllHeaders()){
			List value=new ArrayList();
			HeaderElement[] hes=header.getElements();
			if(hes!=null && hes.length>1){
				for(HeaderElement e:hes){
					value.add(e.getValue());
				}
			}else{
				value.add(header.getValue());
			}
			headers.put(header.getName(), value);
		}
		response.setHeaders(headers);
		try {
			getSpider().getProcessor().process(response);
		} catch (Exception e) {
			log.error("",e);
		}
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy