All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.mirror.download.WebAccess Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.mirror.download;

import com.metaeffekt.mirror.Retry;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

public class WebAccess {

    private final static Logger LOG = LoggerFactory.getLogger(WebAccess.class);

    private HttpHost proxy;
    private CredentialsProvider credentialsProvider;

    /**
     * Set up a proxy in between the host to access (host, port, scheme, username, password).
* Username and password are optional, only used if authentication is requested by the proxy. * * @param scheme The proxy scheme (ex. http). * @param host The proxy host. * @param port The proxy port. * @param username Proxy user (if authentication is required). * @param password Proxy password (if authentication is required). */ public void setDownloaderProxyCredentials(String scheme, String host, int port, String username, String password) { LOG.debug("Setting up proxy credentials for downloader."); this.proxy = new HttpHost(host, port, scheme); this.credentialsProvider = new BasicCredentialsProvider(); this.credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(username, password)); } private CloseableHttpClient createHttpClient() { final HttpClientBuilder httpClient = HttpClients.custom(); final RequestConfig requestConfig = RequestConfig.custom() .setConnectTimeout(60 * 1000) // max time to establish a connection with remote host/server .setConnectionRequestTimeout(60 * 1000) // time to wait for getting a connection from the connection manager/pool .setSocketTimeout(60 * 1000) // max time gap between two consecutive data packets while transferring data from server to client .build(); httpClient.setDefaultRequestConfig(requestConfig); // httpClient.disableCookieManagement(); // cookie rejection warnings if (credentialsProvider != null) { httpClient.setDefaultCredentialsProvider(credentialsProvider); } return httpClient.build(); } private RequestConfig createProxyRequestConfig() { if (proxy != null) { return RequestConfig.custom() .setProxy(proxy) .build(); } else { return RequestConfig.custom() .build(); } } public InputStream fetchResponseBodyFromUrlAsInputStream(URL url, Map requestHeaders) { LOG.info("Performing request to {}", url); return new Retry<>(() -> { try (CloseableHttpClient httpClient = createHttpClient()) { final HttpGet httpGet = new HttpGet(url.toURI()); httpGet.setConfig(createProxyRequestConfig()); if (requestHeaders != null && !requestHeaders.isEmpty()) { for (Map.Entry header : requestHeaders.entrySet()) { httpGet.addHeader(header.getKey(), header.getValue()); } } final HttpResponse response = httpClient.execute(httpGet); final HttpEntity entity = response.getEntity(); return IOUtils.toBufferedInputStream(entity.getContent()); } catch (Exception e) { throw new RuntimeException("Unable to fetch response body from URL " + url.toString(), e); } }) .retryCount(3) .withDelay(1000) .run(); } public List fetchResponseBodyFromUrlAsList(URL url, Map requestHeaders) { final InputStream inputStream = fetchResponseBodyFromUrlAsInputStream(url, requestHeaders); final LineIterator lineIterator; try { lineIterator = IOUtils.lineIterator(inputStream, StandardCharsets.UTF_8); } catch (Exception e) { throw new RuntimeException("Unable to read response body from URL " + url.toString(), e); } final List lines = new ArrayList<>(); while (lineIterator.hasNext()) { lines.add(lineIterator.next()); } return lines; } public List fetchResponseBodyFromUrlAsList(URL url) { return fetchResponseBodyFromUrlAsList(url, null); } public Document fetchResponseBodyFromUrlAsDocument(URL url) { return Jsoup.parse(String.join("", fetchResponseBodyFromUrlAsList(url))); } public void fetchResponseBodyFromUrlToFile(URL url, File file, Map requestHeaders) { final InputStream inputStream = fetchResponseBodyFromUrlAsInputStream(url, requestHeaders); try { if (!file.getParentFile().exists()) { file.getParentFile().mkdirs(); } file.createNewFile(); try (final FileOutputStream outputStream = new FileOutputStream(file)) { byte[] buffer = new byte[1024]; int bytesRead; while ((bytesRead = inputStream.read(buffer)) != -1) { outputStream.write(buffer, 0, bytesRead); } } } catch (Exception e) { throw new RuntimeException("Unable to write response from " + url.toString() + " to " + file.getAbsolutePath(), e); } finally { IOUtils.closeQuietly(inputStream); } } public void fetchResponseBodyFromUrlToFile(URL url, File file) { fetchResponseBodyFromUrlToFile(url, file, null); } public long fetchFileSizeFromUrl(URL url) { return fetchFileSizeFromUrl(url, true); } public long fetchFileSizeFromUrl(URL url, boolean downloadFallback) { return new Retry<>(() -> { try (CloseableHttpClient httpClient = createHttpClient()) { final HttpGet httpGet = new HttpGet(url.toURI()); httpGet.setConfig(createProxyRequestConfig()); final HttpResponse response = httpClient.execute(httpGet); final HttpEntity entity = response.getEntity(); if (entity.getContentLength() != -1) { return entity.getContentLength(); } if (!downloadFallback) { return -1; } LOG.debug("Response did not contain Content-Length header, falling back to fetching the whole file for URL {}", url); long size = 0; byte[] buffer = new byte[1024]; int bytesRead; while ((bytesRead = entity.getContent().read(buffer)) != -1) { size += bytesRead; } return size; } catch (Exception e) { throw new RuntimeException("Unable to fetch file size from URL " + url.toString(), e); } }) .retryCount(3) .withDelay(1000) .run() .longValue(); } public String buildGetRequest(String baseUrl, Map arguments) throws UnsupportedEncodingException { final StringBuilder url = new StringBuilder(); url.append(baseUrl); if (arguments != null) { boolean first = true; for (Map.Entry entry : arguments.entrySet()) { if (first) { url.append("?"); first = false; } else { url.append("&"); } url.append(URLEncoder.encode(entry.getKey(), "UTF-8")); url.append("="); url.append(URLEncoder.encode(entry.getValue(), "UTF-8")); } } return url.toString(); } public HttpHost getProxy() { return proxy; } public CredentialsProvider getCredentialsProvider() { return credentialsProvider; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy