cn.xuqiudong.common.base.craw.CrawlConnect Maven / Gradle / Ivy
package cn.xuqiudong.common.base.craw;
import org.jsoup.Connection;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.X509Certificate;
import java.util.Map;
/**
* HTTP请求工具类
*
* @author VIC
*
*/
public class CrawlConnect {
public static final String CONTENT_TYPE_KEY = "Content-Type";
public static final String ACCEPT_KEY = "Accept";
public static final String CONTENT_TYPE_VALUE_JSON = "application/json;charset=UTF-8";
private final Logger logger = LoggerFactory.getLogger(CrawlConnect.class);
private Connection connection;
/**
* 构造CrawlConnect对象
*
* @param url
* @return
*/
public static CrawlConnect build(String url) {
return new CrawlConnect(url);
}
/**
* 忽略SSL警告 https://stackoverflow.com/questions/7744075/how-to-connect-via-https-using-jsoup 1.12
* 就已经移除了validateTLSCertificates方法, 所以这里曲线救国一下 但是这显然不是一个好的方法
*/
public CrawlConnect validateTlsCertificates() {
connection.sslSocketFactory(socketFactory());
return this;
}
/**
* 构造方法
*/
public CrawlConnect(Connection connection) {
this.connection = connection;
}
private SSLSocketFactory socketFactory() {
TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager() {
@Override
public X509Certificate[] getAcceptedIssuers() {
return null;
}
@Override
public void checkClientTrusted(X509Certificate[] certs, String authType) {
}
@Override
public void checkServerTrusted(X509Certificate[] certs, String authType) {
}
}};
try {
SSLContext sslContext = SSLContext.getInstance("TLS");
sslContext.init(null, trustAllCerts, new java.security.SecureRandom());
return sslContext.getSocketFactory();
} catch (NoSuchAlgorithmException | KeyManagementException e) {
throw new RuntimeException("Failed to create a SSL socket factory");
}
}
/**
* 构造方法
*
* @param url url
*/
public CrawlConnect(String url) {
this.connection = Jsoup.connect(url).ignoreContentType(true).timeout(30000);
}
public CrawlConnect url(String url) {
connection.url(url);
return this;
}
/**
* 超时时间 毫秒
*/
public CrawlConnect setTimeout(int millis) {
this.connection.timeout(millis);
return this;
}
public CrawlConnect url(URL url) {
connection.url(url);
return this;
}
public CrawlConnect cookie(String name, String value) {
connection.cookie(name, value);
return this;
}
public CrawlConnect cookie(Map cookies) {
connection.cookies(cookies);
return this;
}
public CrawlConnect data(String... keyvals) {
connection.data(keyvals);
return this;
}
public CrawlConnect data(String key, String value) {
connection.data(key, value);
return this;
}
/**
* 1.8.x之后的功能
*
* @return this
*/
public CrawlConnect data(String key, String filename, InputStream in) {
connection.data(key, filename, in);
return this;
}
/**
* @param body
* @return this
*/
public CrawlConnect requestBody(String body) {
connection.requestBody(body);
return this;
}
public Response execute() throws IOException {
return connection.execute();
}
public CrawlConnect followRedirects(boolean followRedirects) {
connection.followRedirects(followRedirects);
return this;
}
/**
* 本页面的查询条件
*/
public Document getDocument() throws IOException {
return connection.get();
}
public String getHtml() throws IOException {
return this.getDocument().html();
}
public String getBodyText() throws IOException {
return this.getDocument().body().text();
}
public CrawlConnect header(String key, String value) {
connection.header(key, value);
return this;
}
public CrawlConnect header(Map headers) {
if (headers != null) {
connection.headers(headers);
}
return this;
}
/**
* 设置请求头为json数据
* @return this
*/
public CrawlConnect requestJson() {
connection.header(CONTENT_TYPE_KEY, CONTENT_TYPE_VALUE_JSON);
connection.header(ACCEPT_KEY, CONTENT_TYPE_VALUE_JSON);
return this;
}
public CrawlConnect maxBodySize(int bytes) {
connection.maxBodySize(bytes);
return this;
}
public CrawlConnect method(Connection.Method method) {
connection.method(method);
return this;
}
/**
* Provide an alternate parser to use when parsing the response to a Document.
*/
public CrawlConnect parser(Parser parser) {
connection.parser(parser);
return this;
}
/**
* post
*/
public Document postDocument() throws IOException {
return connection.post();
}
public String postHtml() throws IOException {
return this.postDocument().html();
}
public String postBodyText() throws IOException {
return this.postDocument().body().text();
}
/**
* Sets the default post data character set for x-www-form-urlencoded post data
*/
public CrawlConnect postDataCharset(String charset) {
connection.postDataCharset(charset);
return this;
}
public Connection.Request request() {
return connection.request();
}
public Response response() {
return connection.response();
}
/**
* 下载文件到本地
*
* @param path
* @param fileName
* @throws IOException
*/
public void downFile(String path, String fileName) throws IOException {
logger.info("下载文件 到本地{}{}", path, fileName);
Response response = this.execute();
File file = getFileByPathAndName(path, fileName);
FileOutputStream out = new FileOutputStream(file);
out.write(response.bodyAsBytes());
out.close();
}
private File getFileByPathAndName(String path, String fileName) {
File dir = new File(path);
if (!dir.exists()) {
dir.mkdirs();
}
return new File(path, fileName);
}
}