All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.refine.util.HttpClient Maven / Gradle / Ivy

Go to download

OpenRefine is a free, open source power tool for working with messy data and improving it

There is a newer version: 3.8.7
Show newest version
/*******************************************************************************
 * Copyright (C) 2018, OpenRefine contributors
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 ******************************************************************************/

package com.google.refine.util;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;

import org.apache.hc.client5.http.ClientProtocolException;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.config.RequestConfig;
import org.apache.hc.client5.http.entity.UrlEncodedFormEntity;
import org.apache.hc.client5.http.impl.DefaultHttpRequestRetryStrategy;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClientBuilder;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager;
import org.apache.hc.client5.http.impl.routing.DefaultProxyRoutePlanner;
import org.apache.hc.core5.http.ClassicHttpResponse;
import org.apache.hc.core5.http.EntityDetails;
import org.apache.hc.core5.http.Header;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpException;
import org.apache.hc.core5.http.HttpHost;
import org.apache.hc.core5.http.HttpRequest;
import org.apache.hc.core5.http.HttpRequestInterceptor;
import org.apache.hc.core5.http.HttpResponse;
import org.apache.hc.core5.http.HttpStatus;
import org.apache.hc.core5.http.NameValuePair;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.HttpClientResponseHandler;
import org.apache.hc.core5.http.io.SocketConfig;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.message.BasicNameValuePair;
import org.apache.hc.core5.http.protocol.HttpContext;
import org.apache.hc.core5.util.TimeValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.refine.RefineServlet;

public class HttpClient {

    final static Logger logger = LoggerFactory.getLogger("http-client");

    final private RequestConfig defaultRequestConfig;
    private HttpClientBuilder httpClientBuilder;
    private CloseableHttpClient httpClient;
    private int _delay;
    private int _retryInterval; // delay between original request and first retry, in ms
    private HttpHost proxy;
    private int proxyPort;
    private String proxyHost;
    private Pattern nonProxyHosts;
    private DefaultProxyRoutePlanner routePlanner;

    public HttpClient() {
        this(0);
    }

    public HttpClient(int delay) {
        this(delay, Math.max(delay, 200));
    }

    public HttpClient(int delay, int retryInterval) {
        _delay = delay;
        _retryInterval = retryInterval;
        // Create a connection manager with a custom socket timeout
        PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager();
        final SocketConfig socketConfig = SocketConfig.custom()
                .setSoTimeout(60, TimeUnit.SECONDS)
                .build();
        connManager.setDefaultSocketConfig(socketConfig);

        defaultRequestConfig = RequestConfig.custom()
                .setConnectTimeout(60, TimeUnit.SECONDS)
                .setConnectionRequestTimeout(60, TimeUnit.SECONDS)
                .build();

        httpClientBuilder = HttpClients.custom()
                .setUserAgent(RefineServlet.getUserAgent())
                .setDefaultRequestConfig(defaultRequestConfig)
                .setConnectionManager(connManager)
                // Default Apache HC retry is 1x @1 sec (or the value in Retry-Header)
                .setRetryStrategy(new ExponentialBackoffRetryStrategy(3, TimeValue.ofMilliseconds(_retryInterval)))
//                .setRedirectStrategy(new LaxRedirectStrategy()) // TODO: No longer needed since default doesn't exclude POST?
//               .setConnectionBackoffStrategy(ConnectionBackoffStrategy)
                .addRequestInterceptorFirst(new HttpRequestInterceptor() {

                    private long nextRequestTime = System.currentTimeMillis();

                    @Override
                    public void process(
                            final HttpRequest request,
                            final EntityDetails entity,
                            final HttpContext context) throws HttpException, IOException {

                        long delay = nextRequestTime - System.currentTimeMillis();
                        if (delay > 0) {
                            try {
                                Thread.sleep(delay);
                            } catch (InterruptedException e) {
                            }
                        }
                        nextRequestTime = System.currentTimeMillis() + _delay;

                    }
                });

        // Should we use the system defined proxy
        if ("true".equals(System.getProperty("java.net.useSystemProxies"))) {
            logger.info("Use system defined proxy for http connections");
            httpClient = httpClientBuilder.useSystemProperties().build();
            return;
        }

        // Is a proxy defined
        proxyHost = System.getProperty("http.proxyHost");
        proxyPort = Integer.parseInt(System.getProperty("http.proxyPort", "0"));
        if (proxyHost != null && proxyPort != 0) {
            proxy = new HttpHost("http", proxyHost, proxyPort);
            logger.info("Use provided proxy " + proxy.toString() + " for http connections");
            String strNonProxyHosts = System.getProperty("http.nonProxyHosts");
            nonProxyHosts = fromHostsToPattern(strNonProxyHosts);
            if (nonProxyHosts != null) {
                logger.info("except for hosts matching " + strNonProxyHosts);
            }
            if (proxy != null) {
                // Manage nonProxyHosts
                routePlanner = new DefaultProxyRoutePlanner(proxy) {

                    @Override
                    protected HttpHost determineProxy(HttpHost target, HttpContext context) throws HttpException {
                        String host = target.getHostName();
                        if (nonProxyHosts != null && nonProxyHosts.matcher(host).matches()) {
                            return null;
                        }
                        return proxy;
                    }
                };
                httpClientBuilder.setRoutePlanner(routePlanner);
            }
        }

        // TODO: Placeholder for future Basic Auth implementation
//        String userinfo = url.getUserInfo();
//        // HTTPS only - no sending password in the clear over HTTP
//        if ("https".equals(url.getProtocol()) && userinfo != null) {
//            int s = userinfo.indexOf(':');
//            if (s > 0) {
//                String user = userinfo.substring(0, s);
//                String pw = userinfo.substring(s + 1, userinfo.length());
//                CredentialsProvider credsProvider = new BasicCredentialsProvider();
//                credsProvider.setCredentials(new AuthScope(url.getHost(), 443),
//                        new UsernamePasswordCredentials(user, pw.toCharArray()));
//                httpClientBuilder = httpClientBuilder.setDefaultCredentialsProvider(credsProvider);
//            }
//        }

        httpClient = httpClientBuilder.build();
    }

    protected static Pattern fromHostsToPattern(final String hostsList) {
        if (hostsList == null) {
            return null;
        }
        String[] hosts = hostsList.split("\\|");
        String[] rHosts = new String[hosts.length];
        // Transform glob to regex using Pattern.quote() to avoid regex injections
        for (int i = 0; i < hosts.length; i++) {
            String p = hosts[i];
            if (p.startsWith("*") && p.endsWith("*")) {
                rHosts[i] = ".*" + Pattern.quote(p.substring(1, p.length() - 2)) + ".*";
            } else if (p.startsWith("*")) {
                rHosts[i] = ".*" + Pattern.quote(p.substring(1));
            } else if (p.endsWith("*")) {
                rHosts[i] = Pattern.quote(p.substring(0, p.length() - 1)) + ".*";
            } else {
                rHosts[i] = Pattern.quote(p);
            }
        }
        return Pattern.compile(String.join("|", rHosts));
    }

    public String getAsString(String urlString, Header[] headers) throws IOException {

        final HttpClientResponseHandler responseHandler = new HttpClientResponseHandler() {

            @Override
            public String handleResponse(final ClassicHttpResponse response) throws IOException {
                final int status = response.getCode();
                if (status >= HttpStatus.SC_SUCCESS && status < HttpStatus.SC_REDIRECTION) {
                    final HttpEntity entity = response.getEntity();
                    if (entity == null) {
                        throw new IOException("No content found in " + urlString);
                    }
                    try {
                        return EntityUtils.toString(entity);
                    } catch (final ParseException ex) {
                        throw new ClientProtocolException(ex);
                    }
                } else {
                    // String errorBody = EntityUtils.toString(response.getEntity());
                    throw new ClientProtocolException(String.format("HTTP error %d : %s for URL %s", status,
                            response.getReasonPhrase(), urlString));
                }
            }
        };

        return getResponse(urlString, headers, responseHandler);
    }

    public String getResponse(String urlString, Header[] headers, HttpClientResponseHandler responseHandler) throws IOException {

        HttpGet httpGet = new HttpGet(urlString);

        if (headers != null && headers.length > 0) {
            httpGet.setHeaders(headers);
        }
        httpGet.setConfig(defaultRequestConfig); // FIXME: Redundant? already included in client builder
        return httpClient.execute(httpGet, responseHandler);
    }

    public String postNameValue(String serviceUrl, String name, String value) throws IOException {
        HttpPost request = new HttpPost(serviceUrl);
        List body = Collections.singletonList(
                new BasicNameValuePair(name, value));
        request.setEntity(new UrlEncodedFormEntity(body, StandardCharsets.UTF_8));

        try (CloseableHttpResponse response = httpClient.execute(request)) {
            String reasonPhrase = response.getReasonPhrase();
            int statusCode = response.getCode();
            if (statusCode >= 400) { // We should never see 3xx since they get handled automatically
                throw new IOException(String.format("HTTP error %d : %s for URL %s", statusCode, reasonPhrase,
                        request.getRequestUri()));
            }

            return ParsingUtilities.inputStreamToString(response.getEntity().getContent());
        }
    }

    /**
     * Use binary exponential backoff strategy, instead of the default fixed retry interval, if the server doesn't
     * provide a Retry-After time.
     */
    class ExponentialBackoffRetryStrategy extends DefaultHttpRequestRetryStrategy {

        private final TimeValue defaultInterval;

        public ExponentialBackoffRetryStrategy(final int maxRetries, final TimeValue defaultRetryInterval) {
            super(maxRetries, defaultRetryInterval);
            this.defaultInterval = defaultRetryInterval;
        }

        @Override
        public TimeValue getRetryInterval(HttpResponse response, int execCount, HttpContext context) {
            // Get the default implementation's interval
            TimeValue interval = super.getRetryInterval(response, execCount, context);
            // If it's the same as the default, there was no Retry-After, so use binary
            // exponential backoff
            if (interval.compareTo(defaultInterval) == 0) {
                interval = TimeValue.of(((Double) (Math.pow(2, execCount - 1) * defaultInterval.getDuration())).longValue(),
                        defaultInterval.getTimeUnit());
                logger.warn("Retrying HTTP request after " + interval.toString());
                return interval;
            }
            logger.warn("Retrying HTTP request after " + interval.toString());
            return interval;
        }

        /**
         * Even our POST requests should be retried, they are deemed idempotent
         */
        @Override
        public boolean handleAsIdempotent(final HttpRequest request) {
            return true;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy