All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dstadler.commons.net.UrlUtils Maven / Gradle / Ivy

There is a newer version: 1.3.4
Show newest version
package org.dstadler.commons.net;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLSocketFactory;

import org.apache.commons.io.IOUtils;
import org.dstadler.commons.logging.jdk.LoggerFactory;


/**
 *
 * @author dominik.stadler
 */
public class UrlUtils {
    private static final Logger LOGGER = LoggerFactory.make();

	/*
	 * The first n bytes of data to print to FINE in retrieveRawData()
	 */
    private static final int REPORT_PEEK_COUNT = 200;

	/**
	 * Download data from an URL.
	 *
	 * @param sUrl The full URL used to download the content.
	 *
	 * @return The resulting data, e.g. a HTML string.
	 *
	 * @throws IOException
	 */
	public static String retrieveData(String sUrl, int timeout) throws IOException {
		return retrieveData(sUrl, null, timeout);
	}

	/**
	 * Download data from an URL, if necessary converting from a character encoding.
	 *
	 * @param sUrl The full URL used to download the content.
	 * @param encoding An encoding, e.g. UTF-8, ISO-8859-15. Can be null.
	 * @param timeout The timeout in milliseconds that is used for both
	 * 		connection timeout and read timeout.
	 *
	 * @return The resulting data, e.g. a HTML string.
	 *
	 * @throws IOException
	 */
	public static String retrieveData(String sUrl, String encoding, int timeout) throws IOException {
		return retrieveData(sUrl, encoding, timeout, null);
	}

	/**
	 * Download data from an URL, if necessary converting from a character encoding.
	 *
	 * @param sUrl The full URL used to download the content.
	 * @param encoding An encoding, e.g. UTF-8, ISO-8859-15. Can be null.
	 * @param timeout The timeout in milliseconds that is used for both
	 * 		connection timeout and read timeout.
     * @param sslFactory The SSLFactory to use for the connection, this allows to support custom SSL certificates
	 *
	 * @return The resulting data, e.g. a HTML string.
	 *
	 * @throws IOException
	 */
	public static String retrieveData(String sUrl, String encoding, int timeout, SSLSocketFactory sslFactory) throws IOException {
		byte[] rawData = retrieveRawData(sUrl, timeout, sslFactory);
		if(encoding == null) {
			return new String(rawData);  // NOSONAR
		}

		return new String(rawData, encoding);
	}

	/**
	 * Download data from an URL and return the raw bytes.
	 *
	 * @param sUrl The full URL used to download the content.
	 * @param timeout The timeout in milliseconds that is used for both
	 * 		connection timeout and read timeout.
	 *
	 * @return The resulting data, e.g. a HTML string as byte array.
	 *
	 * @throws IOException
	 */
	public static byte[] retrieveRawData(String sUrl, int timeout) throws IOException {
		return retrieveRawData(sUrl, timeout, null);
	}

	/**
	 * Download data from an URL and return the raw bytes.
	 *
	 * @param sUrl The full URL used to download the content.
	 * @param timeout The timeout in milliseconds that is used for both
	 * 		connection timeout and read timeout.
     * @param sslFactory The SSLFactory to use for the connection, this allows to support custom SSL certificates
	 *
	 * @return The resulting data, e.g. a HTML string as byte array.
	 *
	 * @throws IOException
	 */
	public static byte[] retrieveRawData(String sUrl, int timeout, SSLSocketFactory sslFactory) throws IOException {
		URL url = new URL(sUrl);

		LOGGER.fine("Using the following URL for retrieving the data: " + url.toString());

		HttpURLConnection conn = (HttpURLConnection) url.openConnection();

		// set specified timeout if non-zero
		if(timeout != 0) {
			conn.setConnectTimeout(timeout);
			conn.setReadTimeout(timeout);
		}

		try {
			conn.setDoOutput(false);
			conn.setDoInput(true);

			if(conn instanceof HttpsURLConnection && sslFactory != null) {
				((HttpsURLConnection)conn).setSSLSocketFactory(sslFactory);
			}

			conn.connect();
			int code = conn.getResponseCode();
			if (code != HttpURLConnection.HTTP_OK &&
					code != HttpURLConnection.HTTP_CREATED &&
					code != HttpURLConnection.HTTP_ACCEPTED) {

				String msg = "Error " + code + " returned while retrieving response for url '" + url
						+ "' message from client: " + conn.getResponseMessage();
				LOGGER.warning(msg);

				throw new IOException(msg);
			}

			try (InputStream strm = conn.getInputStream()) {
				return IOUtils.toByteArray(strm);
			}
			// actually read the contents, even if we are not using it to simulate a full download of the data
			/*ByteArrayOutputStream memStream = new ByteArrayOutputStream(conn.getContentLength() == -1 ? 40000 : conn.getContentLength());
			try {
				byte b[] = new byte[4096];
				int len;
				while ((len = strm.read(b)) > 0) {
					memStream.write(b, 0, len);
				}
			} finally {
				memStream.close();
			}

			if(LOGGER.isLoggable(Level.FINE)) {
				LOGGER.fine("Received data, size: " + memStream.size() + "(" + conn.getContentLength() + ") first bytes: "
						+ replaceInvalidChar(memStream.toString().substring(0, Math.min(memStream.size(), REPORT_PEEK_COUNT))));
			}

			return memStream.toByteArray();*/
		} finally {
			conn.disconnect();
		}
	}

	/**
	 * Download data from an URL with a POST request, if necessary converting from a character encoding.
	 *
	 * @param sUrl The full URL used to download the content.
	 * @param encoding An encoding, e.g. UTF-8, ISO-8859-15. Can be null.
	 * @param postRequestBody the body of the POST request, e.g. request parameters; must not be null
	 * @param contentType the content-type of the POST request; may be null
	 * @param timeout The timeout in milliseconds that is used for both connection timeout and read timeout.
	 * @return
	 * @throws IOException
	 */
	public static String retrieveDataPost(String sUrl, String encoding, String postRequestBody, String contentType, int timeout) throws IOException {
		return retrieveStringInternalPost(sUrl, encoding, postRequestBody, contentType, timeout, null);
	}

	/**
	 * Download data from an URL with a POST request, if necessary converting from a character encoding.
	 *
	 * @param sUrl The full URL used to download the content.
	 * @param encoding An encoding, e.g. UTF-8, ISO-8859-15. Can be null.
	 * @param postRequestBody the body of the POST request, e.g. request parameters; must not be null
	 * @param contentType the content-type of the POST request; may be null
	 * @param timeout The timeout in milliseconds that is used for both connection timeout and read timeout.
     * @param sslFactory The SSLFactory to use for the connection, this allows to support custom SSL certificates
	 * @return
	 * @throws IOException
	 */
	public static String retrieveDataPost(String sUrl, String encoding, String postRequestBody, String contentType, int timeout, SSLSocketFactory sslFactory) throws IOException {
		return retrieveStringInternalPost(sUrl, encoding, postRequestBody, contentType, timeout, sslFactory);
	}

	private static String retrieveStringInternalPost(String sUrl, String encoding, String postRequestBody, String contentType, int timeout, SSLSocketFactory sslFactory) throws IOException {
		byte[] rawData = retrieveRawInternalPost(sUrl, postRequestBody, contentType, timeout, sslFactory);
		return encoding != null ? new String(rawData, encoding) : new String(rawData);  // NOSONAR
	}

	private static byte[] retrieveRawInternalPost(String sUrl, String postRequestBody, String contentType, int timeout, SSLSocketFactory sslFactory) throws IOException {
		if (postRequestBody == null) {
			throw new IllegalArgumentException("POST request body must not be null");
		}

		URL url = new URL(sUrl);
		HttpURLConnection connection = (HttpURLConnection) url.openConnection();
		try {
			prepareConnection(connection, timeout, sslFactory);

			writePostRequest(connection, postRequestBody, contentType);

			connection.connect();
			int responseCode = connection.getResponseCode();
			if (!responseCodeValid(responseCode)) {
				String message = "Error " + responseCode + " returned while retrieving response for url " + url + ", response message: " + connection.getResponseMessage();
				LOGGER.warning(message);
				throw new IOException(message);
			}

			// actually read the contents, even if we are not using it to simulate a full download of the data
			try (ByteArrayOutputStream memStream = new ByteArrayOutputStream(connection.getContentLength() == -1 ? 40000 : connection.getContentLength())) {
				try (InputStream in = connection.getInputStream()) {
				    IOUtils.copy(in, memStream);
				}

				if (LOGGER.isLoggable(Level.FINE)) {
					LOGGER.log(Level.FINE, "Received data, size: " + memStream.size() + " (" + connection.getContentLength() + ") first bytes: " +
							replaceInvalidChars(new String(memStream.toByteArray(), 0, Math.min(memStream.size(), REPORT_PEEK_COUNT), "US-ASCII")));
				}

				return memStream.toByteArray();
			}
		} finally {
        	if (LOGGER.isLoggable(Level.FINE)) {
                LOGGER.log(Level.FINE, "Retrieved URL: " + url + ", header fields: " + connection.getHeaderFields());
            }
			connection.disconnect();
		}
	}

	/*
	 * helper for logging binary content
	 */
	private static String replaceInvalidChars(String substring) {
		StringBuilder builder = new StringBuilder();
		for(char c : substring.toCharArray()) {
			if(c < 32) {
				builder.append('.');
			} else {
				builder.append(c);
			}
		}
		return builder.toString();
	}

	/*
	 * helper the decide if response is is considered valid for retrieveData.
	 */
	private static boolean responseCodeValid(int responseCode) {
		return responseCode / 100 == HttpURLConnection.HTTP_OK / 100;
	}

    /*
     * Prepare connection by setting connectTimeout and readTimeout to timeout,
     * doOutput to false and
     * doInot to true.
     * Throws IllegalArgumentException on zero (infinite) timeout.
     */
    private static void prepareConnection(URLConnection connection, int timeout, SSLSocketFactory sslFactory) {
    	if (timeout == 0) {
    		throw new IllegalArgumentException("Zero (infinite) timeouts not permitted");
    	}
        connection.setConnectTimeout(timeout);
		connection.setReadTimeout(timeout);
		connection.setDoInput(true); // whether we want to read from the connection
        connection.setDoOutput(false); // whether we want to write to the connection

		if(connection instanceof HttpsURLConnection && sslFactory != null) {
			((HttpsURLConnection)connection).setSSLSocketFactory(sslFactory);
		}
    }

    /*
     * Write POST reqeust header and body
     */
    private static void writePostRequest(URLConnection connection, String postRequestBody, String contentType) throws IOException {
        connection.setDoOutput(true); // whether we want to write to the connection

    	if (contentType != null) {
    		connection.setRequestProperty("Content-Type", contentType);
    	}
    	// Note: Content-Length is set implicitly by URLConnection
		try (DataOutputStream out = new DataOutputStream(connection.getOutputStream())) {
			out.writeBytes(postRequestBody);
		}
    }

	/**
	 * @param substring
	 * @return
	 */
	/*private static String replaceInvalidChar(String substring) {
		StringBuilder builder = new StringBuilder();
		for(char c : substring.toCharArray()) {
			if(c < 32) {
				builder.append('.');
			} else {
				builder.append(c);
			}
		}
		return builder.toString();
	}*/

    /**
     * Check if the HTTP resource specified by the destination URL is available.
     *
     * @param destinationUrl the destination URL to check for availability
     * @param fireRequest if true a request will be sent to the given URL in addition to opening the
     *        connection
     * @param timeout Timeout in milliseconds after which the call fails because of timeout.
     * @return true if a connection could be set up and the response was received
     * @throws IllegalArgumentException if the destination URL is invalid
     */
    public static boolean isAvailable(String destinationUrl, boolean fireRequest, int timeout) throws IllegalArgumentException {
    	return isAvailable(destinationUrl, fireRequest, false, timeout);
    }

    /**
     * Check if the HTTP resource specified by the destination URL is available.
     *
     * @param destinationUrl the destination URL to check for availability
     * @param fireRequest if true a request will be sent to the given URL in addition to opening the
     *        connection
     * @param timeout Timeout in milliseconds after which the call fails because of timeout.
     * @return true if a connection could be set up and the response was received
     * @throws IllegalArgumentException if the destination URL is invalid
     */
    public static boolean isAvailable(String destinationUrl, boolean fireRequest, boolean ignoreHTTPSHostCheck, int timeout) throws IllegalArgumentException {
        return getAccessError(destinationUrl, fireRequest, ignoreHTTPSHostCheck, timeout, null) == null;
    }

    /**
     * Check if the HTTP resource specified by the destination URL is available.
     *
     * @param destinationUrl the destination URL to check for availability
     * @param fireRequest if true a request will be sent to the given URL in addition to opening the
     *        connection
     * @param timeout Timeout in milliseconds after which the call fails because of timeout.
     * @param sslFactory The SSLFactory to use for the connection, this allows to support custom SSL certificates
     * @return true if a connection could be set up and the response was received
     * @throws IllegalArgumentException if the destination URL is invalid
     */
    public static boolean isAvailable(String destinationUrl, boolean fireRequest, boolean ignoreHTTPSHostCheck, int timeout, SSLSocketFactory sslFactory) throws IllegalArgumentException {
        return getAccessError(destinationUrl, fireRequest, ignoreHTTPSHostCheck, timeout, sslFactory) == null;
    }

    /**
    *
    * @param destinationUrl the destination URL to check for availability
    * @param fireRequest if true a request will be sent to the given URL in addition to opening the
    *        connection
    * @param ignoreHTTPSHostCheck if specified true, a HostnameVerifier is registered which accepts all hostnames during SSL handshake
    * @param timeout Timeout in milliseconds after which the call fails because of timeout.
    * @return null if connection works, an error message if some problem happens.
    * @throws IllegalArgumentException
    */
   public static String getAccessError(String destinationUrl, boolean fireRequest, boolean ignoreHTTPSHostCheck, int timeout) throws IllegalArgumentException {
	   return getAccessError(destinationUrl, fireRequest, ignoreHTTPSHostCheck, timeout, null);
   }

    /**
     *
     * @param destinationUrl the destination URL to check for availability
     * @param fireRequest if true a request will be sent to the given URL in addition to opening the
     *        connection
     * @param ignoreHTTPSHostCheck if specified true, a HostnameVerifier is registered which accepts all hostnames during SSL handshake
     * @param timeout Timeout in milliseconds after which the call fails because of timeout.
     * @param sslFactory The SSLFactory to use for the connection, this allows to support custom SSL certificates
     * @return null if connection works, an error message if some problem happens.
     * @throws IllegalArgumentException
     */
    public static String getAccessError(String destinationUrl, boolean fireRequest, boolean ignoreHTTPSHostCheck, int timeout, SSLSocketFactory sslFactory) throws IllegalArgumentException {
        URL url;
        try {
            url = new URL(destinationUrl);
        } catch (MalformedURLException e) {
            throw new IllegalArgumentException("Invalid destination URL", e);
        }

        HttpURLConnection conn = null;
        try {
            conn = (HttpURLConnection) url.openConnection();

    		// set specified timeout if non-zero
    		if(timeout != 0) {
    			conn.setConnectTimeout(timeout);
    			conn.setReadTimeout(timeout);
    		}

    		if(ignoreHTTPSHostCheck && conn instanceof HttpsURLConnection) {
    			((HttpsURLConnection)conn).setHostnameVerifier(new AllowingHostnameVerifier());
    		}
    		if(conn instanceof HttpsURLConnection && sslFactory != null) {
    			((HttpsURLConnection)conn).setSSLSocketFactory(sslFactory);
    		}

            conn.setDoOutput(false);
            conn.setDoInput(true);

            /* if connecting is not possible this will throw a connection refused exception */
            conn.connect();

            /* dotNet processes require a first request to be sent to initialize the application */
            if (fireRequest) {
                try (InputStream is = conn.getInputStream()) {
                    // nothing to do here, closing is handled by try
                }

            }
            /* if connecting is possible we return true here */
            return null;

        } catch (IOException e) {
        	if(LOGGER.isLoggable(Level.FINE)){
        		LOGGER.fine("Connection attempt to '" + destinationUrl + "' failed. Connection refused.");
        	}

            /* exception is thrown -> server not available */
            return e.getClass().getName() + ": " + e.getMessage();
        } finally {
            if (conn != null) {
                conn.disconnect();
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy