All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.nbnResolving.resolver.UrlResolverUtil Maven / Gradle / Ivy

package org.nbnResolving.resolver;

import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.StatusLine;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpHead;
import org.apache.http.client.params.ClientPNames;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.PlainSocketFactory;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;

/**
 * @author kocer
 *
 */
public class UrlResolverUtil {

	private static final Log LOGGER = LogFactory.getLog(UrlResolverUtil.class);
	
    private final List validCodes;
  
    //string to use as label during connection
    protected String userAgentString = "German National Library URL-Checker";
    
    private HttpClient httpClient;
    
    /**
     * @param newValidCodes
     * @param maxConnections
     * @param connectionPerRoute
     * @param connectionTimeout
     * @param socketTimeout
     */
    public UrlResolverUtil(List newValidCodes, int maxConnections, int connectionPerRoute, int connectionTimeout, int socketTimeout){
    	this.validCodes = newValidCodes;
    	//httpClient = {, Integer.valueOf(201), Integer.valueOf(202), Integer.valueOf(203), Integer.valueOf(204), Integer.valueOf(205), Integer.valueOf(206)};

    	HttpParams params = new BasicHttpParams();
    	SchemeRegistry schemeRegistry = new SchemeRegistry();
       	schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory()));
    	schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory()));

    	ClientConnectionManager clientConnectionManager = new PoolingClientConnectionManager(schemeRegistry); 
    	((PoolingClientConnectionManager) clientConnectionManager).setDefaultMaxPerRoute(connectionPerRoute);
    	((PoolingClientConnectionManager) clientConnectionManager).setMaxTotal(maxConnections);

    	this.httpClient = new DefaultHttpClient(clientConnectionManager, params);
        HttpConnectionParams.setConnectionTimeout(this.httpClient.getParams(), connectionTimeout);
        HttpConnectionParams.setSoTimeout(this.httpClient.getParams(), socketTimeout);
        this.httpClient.getParams().setBooleanParameter(ClientPNames.HANDLE_REDIRECTS, false);
        this.httpClient.getParams().setBooleanParameter(ClientPNames.REJECT_RELATIVE_REDIRECT, false);
        this.httpClient.getParams().setParameter(CoreProtocolPNames.USER_AGENT, this.userAgentString);
        
        
    	if (LOGGER.isDebugEnabled()) LOGGER.debug("UrlResolverUtil initialised.");       
    }
    
    
    /**
     * @param url URL to check.
     * @return TRUE is HTTP response code for the given URL is in the list of valid codes. 
     */
    public boolean isResponseValid(final String url){
    	boolean responseValid = false;
    	if (LOGGER.isDebugEnabled()) LOGGER.debug("Sending HTTP HEAD Request. URL = " + url);
    	
        HttpResponse httpResponse = null;
        HttpHead head= new HttpHead(url);
 
        try {
        	httpResponse = this.httpClient.execute(head);
        	if (LOGGER.isDebugEnabled()) LOGGER.debug("HEAD request successfull");
        } catch (RuntimeException e) {
            //LOGGER.error("A RuntimeException occured!\nStacktrace:\n" + e.getMessage());
        	return false;
        } catch (Exception e) {
            //LOGGER.error("An Exception occured!\nStacktrace:\n" + e.getMessage());
        	return false;
        }
        
        StatusLine statusLine = httpResponse.getStatusLine();
        int statusCode = statusLine.getStatusCode();
        
        if (LOGGER.isDebugEnabled()) LOGGER.debug("HTTP Status-code: " + statusCode);

        if ((statusCode > 299) && (statusCode < 400)) {
        	//It is a redirect See: http://www.w3.org/Protocols/rfc2616/rfc2616.html
            if (LOGGER.isDebugEnabled()) LOGGER.debug("Its a redirect: analyzing the HTTP Header");
        	//logger.debug(getMethod.)
        	if (!httpResponse.containsHeader("Location")) {
        		return false;
        	}
        	if (LOGGER.isDebugEnabled()) LOGGER.debug("Analyzing redirect location");
            Header location = httpResponse.getFirstHeader("Location");
            if (location == null) {
                return false;
            }
            if (LOGGER.isDebugEnabled()) LOGGER.debug("Location: " + location.getValue());
            //TODO: what if the location is a relativ URL like ../someotherdir/someotherfile.pdf  ??
            head= new HttpHead(location.getValue());
     
            try {
            	httpResponse = this.httpClient.execute(head);
            	if (LOGGER.isDebugEnabled()) LOGGER.debug("HEAD request successfull");
            } catch (RuntimeException e) {
                //LOGGER.error("A RuntimeException occured!\nStacktrace:\n" + e.getMessage());
            	return false;
            } catch (Exception e) {
                //LOGGER.error("An Exception occured!\nStacktrace:\n" + e.getMessage());
            	return false;
            }
            statusLine = httpResponse.getStatusLine();
            statusCode = statusLine.getStatusCode();
        }
        	
        if (this.validCodes.contains(Integer.valueOf(statusCode))) return true;
    	return responseValid;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy