All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.nasa.pds.harvest.util.file.FileDownloader Maven / Gradle / Ivy

package gov.nasa.pds.harvest.util.file;

import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import javax.net.ssl.SSLContext;

import org.apache.http.HttpEntity;
import org.apache.http.StatusLine;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.BasicHttpClientConnectionManager;
import org.apache.http.ssl.SSLContexts;
import org.apache.http.ssl.TrustStrategy;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import gov.nasa.pds.harvest.util.CloseUtils;

/**
 * File downloader with retry logic. 
 * By default, SSL certificate and host verification is disabled for HTTPS 
 * connections to support self-signed certificates. This can be turned off.
 *  
 * @author karpenko
 */
public class FileDownloader
{
    private Logger log;
    private int numRetries = 3;
    
    private CloseableHttpClient httpClient;
    

    /**
     * Constructor
     * @param sslTrustAll Enable or disable SSL certificate and host validation 
     * to support self-signed certificates.
     * @throws Exception
     */
    public FileDownloader(boolean sslTrustAll) throws Exception
    {
        log = LogManager.getLogger(this.getClass());
        httpClient = createHttpClient(sslTrustAll);
    }

    
    /**
     * Download a file from a URL. Retry several times on error.
     * @param fromUrl Download a file from this URL.
     * @param toFile Save to this file
     * @throws Exception an exception
     */
    public void download(String fromUrl, File toFile) throws Exception
    {
        int count = 0;
        
        while(true)
        {
            try
            {
                count++;
                downloadOnce(fromUrl, toFile);
                return;
            }
            catch(Exception ex)
            {
                log.error(ex.getMessage());
                if(count < numRetries)
                {
                    log.info("Will retry in 5 seconds");
                    Thread.sleep(5000);
                }
                else
                {
                    throw new Exception("Could not download " + fromUrl);
                }
            }
        }
    }
    
    
    /**
     * Try downloading file once.
     * @param fromUrl source URL
     * @param toFile target file
     * @throws Exception an exception
     */
    private void downloadOnce(String fromUrl, File toFile) throws Exception
    {
        InputStream is = null;
        FileOutputStream os = null;
        CloseableHttpResponse resp = null;
        
        log.info("Downloading " + fromUrl + " to " + toFile.getAbsolutePath());
        
        try
        {
            HttpGet httpGet = new HttpGet(fromUrl);
            resp = httpClient.execute(httpGet);
            StatusLine status = resp.getStatusLine();
            
            if(status.getStatusCode() != 200)
            {
                throw new Exception(status.getStatusCode()  + " - " + status.getReasonPhrase());
            }
            
            HttpEntity entity = resp.getEntity();
            is = entity.getContent();            
            os = new FileOutputStream(toFile);            
            is.transferTo(os);
        }
        finally
        {
            CloseUtils.close(os);
            CloseUtils.close(is);
            CloseUtils.close(resp);
        }
    }


    private CloseableHttpClient createHttpClient(boolean sslTrustAll) throws Exception
    {
        if(sslTrustAll)
        {
            TrustStrategy acceptingTrustStrategy = (cert, authType) -> true;
            SSLContext sslContext = SSLContexts.custom().loadTrustMaterial(null, acceptingTrustStrategy).build();
            SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE);
            
            RegistryBuilder sfRegistryBld = RegistryBuilder.create();
            sfRegistryBld.register("https", sslsf);
            sfRegistryBld.register("http", new PlainConnectionSocketFactory());
            Registry sfRegistry = sfRegistryBld.build();

            BasicHttpClientConnectionManager connectionManager = new BasicHttpClientConnectionManager(sfRegistry);
            
            HttpClientBuilder clientBld = HttpClients.custom();
            clientBld.setSSLSocketFactory(sslsf);
            clientBld.setConnectionManager(connectionManager);
            
            CloseableHttpClient httpClient = clientBld.build();
            return httpClient;
        }
        else
        {
            return HttpClients.createDefault();
        }
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy