All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.nbnResolving.resolver.UrlResolverUtil Maven / Gradle / Ivy

Go to download

Java classes providing resolving functionality for Persistent Identifiers. Main focus is on National Bibliography Numbers, but some other known systems are also supported. See the official URN:NBN Resolver http://nbn-resolving.org or http://persid.org

The newest version!
package org.nbnResolving.resolver;

import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.StatusLine;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpHead;
import org.apache.http.client.params.ClientPNames;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.PlainSocketFactory;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;

/**
 * @author kocer
 *
 */
public class UrlResolverUtil {

	private static final Log LOGGER = LogFactory.getLog(UrlResolverUtil.class);
	
    private final List validCodes;
  
    //string to use as label during connection
    protected String userAgentString = "German National Library URL-Checker";
    
    private HttpClient httpClient;
    
    /**
     * @param newValidCodes
     * @param maxConnections
     * @param connectionPerRoute
     * @param connectionTimeout
     * @param socketTimeout
     */
    public UrlResolverUtil(List newValidCodes, int maxConnections, int connectionPerRoute, int connectionTimeout, int socketTimeout){
    	this.validCodes = newValidCodes;
    	//httpClient = {, Integer.valueOf(201), Integer.valueOf(202), Integer.valueOf(203), Integer.valueOf(204), Integer.valueOf(205), Integer.valueOf(206)};

    	HttpParams params = new BasicHttpParams();
    	SchemeRegistry schemeRegistry = new SchemeRegistry();
       	schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory()));
    	schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory()));

    	ClientConnectionManager clientConnectionManager = new PoolingClientConnectionManager(schemeRegistry); 
    	((PoolingClientConnectionManager) clientConnectionManager).setDefaultMaxPerRoute(connectionPerRoute);
    	((PoolingClientConnectionManager) clientConnectionManager).setMaxTotal(maxConnections);

    	this.httpClient = new DefaultHttpClient(clientConnectionManager, params);
        HttpConnectionParams.setConnectionTimeout(this.httpClient.getParams(), connectionTimeout);
        HttpConnectionParams.setSoTimeout(this.httpClient.getParams(), socketTimeout);
        this.httpClient.getParams().setBooleanParameter(ClientPNames.HANDLE_REDIRECTS, false);
        this.httpClient.getParams().setBooleanParameter(ClientPNames.REJECT_RELATIVE_REDIRECT, false);
        this.httpClient.getParams().setParameter(CoreProtocolPNames.USER_AGENT, this.userAgentString);
        
        
    	if (LOGGER.isDebugEnabled()) LOGGER.debug("UrlResolverUtil initialised.");       
    }
    
    
    /**
     * @param url URL to check.
     * @return TRUE is HTTP response code for the given URL is in the list of valid codes. 
     */
    public boolean isResponseValid(final String url){
    	boolean responseValid = false;
    	if (LOGGER.isDebugEnabled()) LOGGER.debug("Sending HTTP HEAD Request. URL = " + url);
    	
        HttpResponse httpResponse = null;
        HttpHead head= new HttpHead(url);
 
        try {
        	httpResponse = this.httpClient.execute(head);
        	if (LOGGER.isDebugEnabled()) LOGGER.debug("HEAD request successfull");
        } catch (RuntimeException e) {
            //LOGGER.error("A RuntimeException occured!\nStacktrace:\n" + e.getMessage());
        	return false;
        } catch (Exception e) {
            //LOGGER.error("An Exception occured!\nStacktrace:\n" + e.getMessage());
        	return false;
        }
        
        StatusLine statusLine = httpResponse.getStatusLine();
        int statusCode = statusLine.getStatusCode();
        
        if (LOGGER.isDebugEnabled()) LOGGER.debug("HTTP Status-code: " + statusCode);

        if ((statusCode > 299) && (statusCode < 400)) {
        	//It is a redirect See: http://www.w3.org/Protocols/rfc2616/rfc2616.html
            if (LOGGER.isDebugEnabled()) LOGGER.debug("Its a redirect: analyzing the HTTP Header");
        	//logger.debug(getMethod.)
        	if (!httpResponse.containsHeader("Location")) {
        		return false;
        	}
        	if (LOGGER.isDebugEnabled()) LOGGER.debug("Analyzing redirect location");
            Header location = httpResponse.getFirstHeader("Location");
            if (location == null) {
                return false;
            }
            if (LOGGER.isDebugEnabled()) LOGGER.debug("Location: " + location.getValue());
            //TODO: what if the location is a relativ URL like ../someotherdir/someotherfile.pdf  ??
            head= new HttpHead(location.getValue());
     
            try {
            	httpResponse = this.httpClient.execute(head);
            	if (LOGGER.isDebugEnabled()) LOGGER.debug("HEAD request successfull");
            } catch (RuntimeException e) {
                //LOGGER.error("A RuntimeException occured!\nStacktrace:\n" + e.getMessage());
            	return false;
            } catch (Exception e) {
                //LOGGER.error("An Exception occured!\nStacktrace:\n" + e.getMessage());
            	return false;
            }
            statusLine = httpResponse.getStatusLine();
            statusCode = statusLine.getStatusCode();
        }
        	
        if (this.validCodes.contains(Integer.valueOf(statusCode))) return true;
    	return responseValid;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy