All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.europeanaconnect.erds.HTTPResolverMultiThreaded Maven / Gradle / Ivy

Go to download

Java classes providing resolving functionality for Persistent Identifiers. Main focus is on National Bibliography Numbers, but some other known systems are also supported. See the official URN:NBN Resolver http://nbn-resolving.org or http://persid.org

The newest version!
/* *********************************************************************
 * Class HTTPResolverMultiThreaded
 * Copyright (c) 2010-2013, German National Library / Deutsche Nationalbibliothek
 * Adickesallee 1, D-60322 Frankfurt am Main, Federal Republic of Germany 
 *
 * This program is free software.
 * For your convenience it is dual licensed.
 * You can redistribute it and/or modify it under the terms of
 * one of the following licenses:
 * 
 * 1.)
 * The GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 * You should have received a copy of the GNU General Public License
 * along with this program (gpl-3.0.txt); if not please read
 * http://www.gnu.org/licenses/gpl.html
 * 
 * 2.)
 * The European Union Public Licence as published by
 * The European Commission (executive body of the European Union);
 * either version 1.1 of the License, or (at your option) any later version.
 * You should have received a copy of the European Union Public Licence
 * along with this program (eupl_v1.1_en.pdf); if not please read
 * http://www.osor.eu/eupl
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the above licenses for more details.
 * 
 * @author Nuno Freire -- National Library of Portugal
 * 
 **********************************************************************/
package eu.europeanaconnect.erds;

import java.io.IOException;
import java.util.List;

import javax.net.ssl.SSLHandshakeException;

import org.apache.http.Header;
import org.apache.http.HttpEntityEnclosingRequest;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.NoHttpResponseException;
import org.apache.http.StatusLine;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.HttpRequestRetryHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.params.ClientPNames;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.PlainSocketFactory;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;
import org.apache.http.protocol.ExecutionContext;
import org.apache.http.protocol.HttpContext;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import eu.europeanaconnect.erds.ResolverException.ResolverExceptionCode;

/**
 * A Resolver that queries a remote server via HTTP.
 * It fetches the URL by reading the header of the redirect-response of the remote resolver.
 * It supports multiple simultaneous connections and request retries on error.
 * 
 * @author Nuno Freire (Original code)
 * @author Karaca Kocer (Small corrections)
 */
public class HTTPResolverMultiThreaded implements DataProvider {
    private static final Log LOGGER = LogFactory.getLog(HTTPResolverMultiThreaded.class);
    protected String requestUrlPattern;
    protected String id;
    protected String label;
    protected List supportedNamespaces;
    protected String identifierPattern;
    //string to use as label during connection
    protected String userAgentString = "The European Resolution Discovery Service";
    protected DefaultHttpClient httpClient = null;
    //TODO: take those constants into Spring configuration? (Karaca 20100805)
    protected static final int MAX_CONNECTIONS         = 250;
    protected static final int CONN_PER_ROUTE          = 250;
    protected static final int MAXIMUM_REQUEST_RETRIES = 3;
    protected static final int CONECTION_TIMEOUT       = 5000;
    protected static final int SOCKET_TIMEOUT          = 5000; //both reduced 15000 -> 5000 Karaca 20130402
    
    /**
     * {@link "https://hc.apache.org/"}
     */
    public HTTPResolverMultiThreaded() {
    	HttpParams params = new BasicHttpParams();
//    	ConnManagerParams.setMaxTotalConnections(params, MAX_CONNECTIONS);
//    	ConnPerRouteBean connPerRoute = new ConnPerRouteBean(CONN_PER_ROUTE);
//    	ConnManagerParams.setMaxConnectionsPerRoute(params, connPerRoute);

    	SchemeRegistry schemeRegistry = new SchemeRegistry();
       	schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory()));
    	schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory()));

//    	ClientConnectionManager clientConnectionManager = new ThreadSafeClientConnManager(params, schemeRegistry);
    	ClientConnectionManager clientConnectionManager = new PoolingClientConnectionManager(schemeRegistry); 
    	((PoolingClientConnectionManager) clientConnectionManager).setDefaultMaxPerRoute(CONN_PER_ROUTE);
    	((PoolingClientConnectionManager) clientConnectionManager).setMaxTotal(MAX_CONNECTIONS);

    	this.httpClient = new DefaultHttpClient(clientConnectionManager, params);
        HttpConnectionParams.setConnectionTimeout(this.httpClient.getParams(), CONECTION_TIMEOUT);
        HttpConnectionParams.setSoTimeout(this.httpClient.getParams(), SOCKET_TIMEOUT);
        this.httpClient.getParams().setBooleanParameter(ClientPNames.HANDLE_REDIRECTS, false);
        this.httpClient.getParams().setBooleanParameter(ClientPNames.REJECT_RELATIVE_REDIRECT, false);
        this.httpClient.getParams().setParameter(CoreProtocolPNames.USER_AGENT, this.userAgentString);        
        
        HttpRequestRetryHandler myRetryHandler = new HttpRequestRetryHandler() {
        	@Override
            public boolean retryRequest(IOException exception, int executionCount, HttpContext context) {
                if (executionCount >= MAXIMUM_REQUEST_RETRIES) {
                    // Do not retry if over max retry count
                    return false;
                }
                if (exception instanceof NoHttpResponseException) {
                    // Retry if the server dropped connection on us
                    return true;
                }
                if (exception instanceof SSLHandshakeException) {
                    // Do not retry on SSL handshake exception
                    return false;
                }
                HttpRequest request = (HttpRequest) context.getAttribute(ExecutionContext.HTTP_REQUEST);
                boolean idempotent = !(request instanceof HttpEntityEnclosingRequest); 
                if (idempotent) {
                    // Retry if the request is considered idempotent 
                    return true;
                }
                return false;
            }

        };
        this.httpClient.setHttpRequestRetryHandler(myRetryHandler);
    } 
    
    /**
     * Asks a remote Resolver via HTTP and fetches the URL
     * out of the header of the redirect response of the remote resolver
     * using the "Location" attribute.
     * 
     * @param resolverRequest A eu.europeanaconnect.erds.ResolverRequest object defining the user request.
     * @return A eu.europeanaconnect.erds.ResolverResponse object that contains the resolving information.
     * @throws ResolverException Exception defining the error condition.
     * 
     * @see DataProvider#getResponse(ResolverRequest)
     * @see eu.europeanaconnect.erds.ResolverRequest
     * @see eu.europeanaconnect.erds.ResolverResponse
     * @see eu.europeanaconnect.erds.ResolverException
     * 
     * @since 17.03.2010
     */
    @Override
    public ResolverResponse getResponse(ResolverRequest resolverRequest) throws ResolverException {
        ResolverResponse resolverResponse = new ResolverResponse();
        HttpResponse httpResponse = null; 
        String url = getRequestUrl(resolverRequest);
        if (LOGGER.isDebugEnabled()) LOGGER.debug("URL = " + url);
        HttpGet getMethod = new HttpGet(url);
 
        try {
        	httpResponse = this.httpClient.execute(getMethod);
        	LOGGER.debug("Operation GET successfull");
        } catch (ClientProtocolException e) {
            LOGGER.error("A ClientProtocolException occured!\nStacktrace:\n" + e.getMessage());
        	e.printStackTrace();
            throw new ResolverException(this.id, ResolverExceptionCode.HTTP_PROTOCOL_ERROR, e);
        } catch (IOException e) {
            LOGGER.error("An IOException occured!\nStacktrace:\n" + e.getMessage());
        	e.printStackTrace();
            throw new ResolverException(this.id, ResolverExceptionCode.IO_ERROR, e);
        } catch (RuntimeException e) {
            LOGGER.error("A RuntimeException occured!\nStacktrace:\n" + e.getMessage());
        	e.printStackTrace();
            throw new ResolverException(this.id, ResolverExceptionCode.SEVERE_RUNTIME_ERROR, e);
        } catch (Exception e) {
            LOGGER.error("An Exception occured!\nStacktrace:\n" + e.getMessage());
        	e.printStackTrace();
            throw new ResolverException(this.id, ResolverExceptionCode.UNKNOWN_ERROR, e);
        }
        
        StatusLine statusLine = httpResponse.getStatusLine();
        int statusCode = statusLine.getStatusCode();
        
        if (LOGGER.isDebugEnabled()) LOGGER.debug("HTTP Status-code: " + statusCode);
        if ((statusCode > 299) && (statusCode < 400)) {
        	//It is a redirect See: http://www.w3.org/Protocols/rfc2616/rfc2616.html
            if (LOGGER.isDebugEnabled()) LOGGER.debug("Its a redirect: analyzing the HTTP Header");
        	//logger.debug(getMethod.)
        	if (!httpResponse.containsHeader("Location")) {
        	    LOGGER.error("Header does not contain Location attribute!");
                throw new ResolverException(this.id, ResolverExceptionCode.NO_REDIRECT_ERROR);
        	}
        	if (LOGGER.isDebugEnabled()) LOGGER.debug("Analyzing redirect location");
            Header location = httpResponse.getFirstHeader("Location");
            if (location == null) {
                LOGGER.error("No redirect header for URL: " + url);
                throw new ResolverException(this.id, ResolverExceptionCode.NO_REDIRECT_ERROR);
            }
            if (LOGGER.isDebugEnabled()) LOGGER.debug("Location: " + location.getValue());
            resolverResponse.setUrl(location.getValue());
        } else {
        	if (statusCode < 300) {
        		//that was not an identifier but a real link
        		resolverResponse.setUrl(url);
        	} else {
        		//server returned a 4xx or 5xx code -> handle the error
                handleHttpErrorCodes(statusCode);
        	}
        }

        return resolverResponse;
    }
    
    /**
     * Handles the HTTP status codes that are different than redirect (3xx)
     * @param statusCode HTTP Status code (See RFC 2616)
     * {@link "http://www.w3.org/Protocols/rfc2616/rfc2616.html"}
     * @throws ResolverException
     */
    public void handleHttpErrorCodes(int statusCode) throws ResolverException {
        //See: http://www.w3.org/Protocols/rfc2616/rfc2616.html
        if (statusCode >= 500) {
            throw new ResolverException(this.id, ResolverExceptionCode.REMOTE_RESOLVER_ERROR);
        }
        if (statusCode >= 400) {
            throw new ResolverException(this.id, ResolverExceptionCode.INVALID_IDENTIFIER);
        }
        //It is not a server error (5xx), not a client error (4xx)
        //other codes should be checked earlier in algorithm. So you should not be here!
    }

    /**
     * @return The label.
     */
    public String getLabel() {
        return this.label;
    }

    /**
     * @param label Set the label.
     */
    public void setLabel(String label) {
        this.label = label;
    }

    /**
     * @return Returns the HTTP Client that connects to the server.
     * @see org.apache.http.client.HttpClient
     */
    public HttpClient getHttpClient() {
        return this.httpClient;
    }

    /**
     * @param resolverRequest
     * @return The URL of the request.
     */
    public String getRequestUrl(ResolverRequest resolverRequest) {
        return this.requestUrlPattern.replace("$identifier", resolverRequest.getIdentifier());
    }

    /**
     * @return The pattern of the request.
     */
    public String getRequestUrlPattern() {
        return this.requestUrlPattern;
    }

    /**
     * @param requestUrlPattern
     */
    public void setRequestUrlPattern(String requestUrlPattern) {
        this.requestUrlPattern = requestUrlPattern;
    }

    /**
     * {@inheritDoc} 
     */
    @Override
    public List getSupportedNamespaces() {
        return this.supportedNamespaces;
    }

    /**
     * @param supportedNamespaces
     */
    public void setSupportedNamespaces(List supportedNamespaces) {
        this.supportedNamespaces = supportedNamespaces;
    }

    /**
     * {@inheritDoc} 
     */
    @Override
    public String getIdentifierPattern() {
        return this.identifierPattern;
    }

    /**
     * @param identifierPattern
     */
    public void setIdentifierPattern(String identifierPattern) {
        this.identifierPattern = identifierPattern;
    }

    /**
     * {@inheritDoc} 
     */
    @Override
    public String getId() {
        return this.id;
    }

    /**
     * @param id
     */
    public void setId(String id) {
        this.id = id;
    }

	/**
	 * @return the userAgentString
	 */
	public String getUserAgentString() {
		return this.userAgentString;
	}

	/**
	 * @param newString the userAgentString to set
	 */
	public void setUserAgentString(String newString) {
		this.userAgentString = newString;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy