
mtons.spider.http.supports.JsoupHttpConnect Maven / Gradle / Ivy
package mtons.spider.http.supports;
import org.apache.http.HttpStatus;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import mtons.spider.http.HttpConnect;
import mtons.spider.http.Request;
import mtons.spider.http.Response;
import java.io.IOException;
/**
* Created by langhsu on 2015/11/4.
*/
public class JsoupHttpConnect extends AbstractHttpClient implements HttpConnect {
public JsoupHttpConnect() {
}
public JsoupHttpConnect(int timeout) {
this.timeout = timeout;
}
@Override
public Response send(Request request) throws IOException {
// FIXME: 暂时在连接前强制设置 代理
if (request.getHttpHost() != null) {
System.getProperties().setProperty("proxySet", "true");
System.getProperties().setProperty("http.proxyHost", String.valueOf(request.getHttpHost().getHostName()));
System.getProperties().setProperty("http.proxyPort", String.valueOf(request.getHttpHost().getPort()));
}
Connection connection = Jsoup.connect(request.getUrl()).timeout(timeout);
if (request.getUrl().startsWith("https")) {
connection.validateTLSCertificates(false);
}
injectParameters(connection, request);
connection.ignoreContentType(true);
Document doc;
switch (request.getMethod()) {
case POST:
doc = connection.post();
break;
default:
doc = connection.get();
}
Response response = new Response(request);
if (doc != null) {
response.setDocument(doc);
response.setRaw(doc.html());
response.setStatusCode(HttpStatus.SC_OK);
request.setStatusCode(HttpStatus.SC_OK);
} else {
response.setStatusCode(HttpStatus.SC_BAD_REQUEST);
request.setStatusCode(HttpStatus.SC_BAD_REQUEST);
}
return response;
}
@Override
public void destroy() {
}
/**
* 注入参数
* @param connection
* @param request
*/
private void injectParameters(Connection connection, Request request) {
if (request.getHeader() != null) {
request.getHeader().forEach((k, v) -> connection.header(k, v));
}
// 若没有指定User-Agent则使用随机User-Agent
if (request.getHeader() == null || !request.getHeader().containsKey("User-Agent")) {
connection.header("User-Agent", getUserAgents());
}
// 若没有指定Content-Type
if (request.getHeader() == null || !request.getHeader().containsKey("Content-Type")) {
connection.header("Content-Type", "text/html;charset=" + request.getCharset());
}
connection.header("Connection", "close");
if (!request.getParameters().isEmpty()) {
connection.data(request.getParameters());
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy