All Downloads are FREE. Search and download functionalities are using the official Maven repository.

mtons.spider.http.supports.JsoupHttpConnect Maven / Gradle / Ivy

package mtons.spider.http.supports;

import org.apache.http.HttpStatus;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import mtons.spider.http.HttpConnect;
import mtons.spider.http.Request;
import mtons.spider.http.Response;

import java.io.IOException;

/**
 * Created by langhsu on 2015/11/4.
 */
public class JsoupHttpConnect extends AbstractHttpClient implements HttpConnect {

    public JsoupHttpConnect() {
    }

    public JsoupHttpConnect(int timeout) {
        this.timeout = timeout;
    }

    @Override
    public Response send(Request request) throws IOException {
        // FIXME: 暂时在连接前强制设置 代理
        if (request.getHttpHost() != null) {
            System.getProperties().setProperty("proxySet", "true");
            System.getProperties().setProperty("http.proxyHost", String.valueOf(request.getHttpHost().getHostName()));
            System.getProperties().setProperty("http.proxyPort", String.valueOf(request.getHttpHost().getPort()));
        }
        Connection connection = Jsoup.connect(request.getUrl()).timeout(timeout);

        if (request.getUrl().startsWith("https")) {
            connection.validateTLSCertificates(false);
        }

        injectParameters(connection, request);

        connection.ignoreContentType(true);

        Document doc;
        switch (request.getMethod()) {
            case POST:
                doc = connection.post();
                break;
            default:
                doc = connection.get();
        }

        Response response = new Response(request);

        if (doc != null) {
            response.setDocument(doc);
            response.setRaw(doc.html());
            response.setStatusCode(HttpStatus.SC_OK);
            request.setStatusCode(HttpStatus.SC_OK);
        } else {
            response.setStatusCode(HttpStatus.SC_BAD_REQUEST);
            request.setStatusCode(HttpStatus.SC_BAD_REQUEST);
        }
        return response;
    }

    @Override
    public void destroy() {

    }

    /**
     * 注入参数
     * @param connection
     * @param request
     */
    private void injectParameters(Connection connection, Request request) {
        if (request.getHeader() != null) {
            request.getHeader().forEach((k, v) -> connection.header(k, v));
        }

        // 若没有指定User-Agent则使用随机User-Agent
        if (request.getHeader() == null || !request.getHeader().containsKey("User-Agent")) {
            connection.header("User-Agent", getUserAgents());
        }

        // 若没有指定Content-Type
        if (request.getHeader() == null || !request.getHeader().containsKey("Content-Type")) {
            connection.header("Content-Type", "text/html;charset=" + request.getCharset());
        }

        connection.header("Connection", "close");

        if (!request.getParameters().isEmpty()) {
            connection.data(request.getParameters());
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy