All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.europeanaconnect.erds.HTTPResolverMultiThreaded Maven / Gradle / Ivy

/* *********************************************************************
 * Class HTTPResolverMultiThreaded
 * Copyright (c) 2010-2013, German National Library / Deutsche Nationalbibliothek
 * Adickesallee 1, D-60322 Frankfurt am Main, Federal Republic of Germany 
 *
 * This program is free software.
 * For your convenience it is dual licensed.
 * You can redistribute it and/or modify it under the terms of
 * one of the following licenses:
 * 
 * 1.)
 * The GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 * You should have received a copy of the GNU General Public License
 * along with this program (gpl-3.0.txt); if not please read
 * http://www.gnu.org/licenses/gpl.html
 * 
 * 2.)
 * The European Union Public Licence as published by
 * The European Commission (executive body of the European Union);
 * either version 1.1 of the License, or (at your option) any later version.
 * You should have received a copy of the European Union Public Licence
 * along with this program (eupl_v1.1_en.pdf); if not please read
 * http://www.osor.eu/eupl
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the above licenses for more details.
 * 
 * @author Nuno Freire -- National Library of Portugal
 * 
 **********************************************************************/
package eu.europeanaconnect.erds;

import java.io.IOException;
import java.util.List;

import javax.net.ssl.SSLHandshakeException;

import org.apache.http.Header;
import org.apache.http.HttpEntityEnclosingRequest;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.NoHttpResponseException;
import org.apache.http.StatusLine;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.HttpRequestRetryHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.params.ClientPNames;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.PlainSocketFactory;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;
import org.apache.http.protocol.ExecutionContext;
import org.apache.http.protocol.HttpContext;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import eu.europeanaconnect.erds.ResolverException.ResolverExceptionCode;

/**
 * A Resolver that queries a remote server via HTTP.
 * It fetches the URL by reading the header of the redirect-response of the remote resolver.
 * It supports multiple simultaneous connections and request retries on error.
 * 
 * @author Nuno Freire (Original code)
 * @author Karaca Kocer (Small corrections)
 */
public class HTTPResolverMultiThreaded implements DataProvider {
    private static final Log LOGGER = LogFactory.getLog(HTTPResolverMultiThreaded.class);
    protected String requestUrlPattern;
    protected String id;
    protected String label;
    protected List supportedNamespaces;
    protected String identifierPattern;
    //string to use as label during connection
    protected String userAgentString = "The European Resolution Discovery Service";
    protected DefaultHttpClient httpClient = null;
    //TODO: take those constants into Spring configuration? (Karaca 20100805)
    protected static final int MAX_CONNECTIONS         = 250;
    protected static final int CONN_PER_ROUTE          = 250;
    protected static final int MAXIMUM_REQUEST_RETRIES = 3;
    protected static final int CONECTION_TIMEOUT       = 5000;
    protected static final int SOCKET_TIMEOUT          = 5000; //both reduced 15000 -> 5000 Karaca 20130402
    
    /**
     * {@link "https://hc.apache.org/"}
     */
    public HTTPResolverMultiThreaded() {
    	HttpParams params = new BasicHttpParams();
//    	ConnManagerParams.setMaxTotalConnections(params, MAX_CONNECTIONS);
//    	ConnPerRouteBean connPerRoute = new ConnPerRouteBean(CONN_PER_ROUTE);
//    	ConnManagerParams.setMaxConnectionsPerRoute(params, connPerRoute);

    	SchemeRegistry schemeRegistry = new SchemeRegistry();
       	schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory()));
    	schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory()));

//    	ClientConnectionManager clientConnectionManager = new ThreadSafeClientConnManager(params, schemeRegistry);
    	ClientConnectionManager clientConnectionManager = new PoolingClientConnectionManager(schemeRegistry); 
    	((PoolingClientConnectionManager) clientConnectionManager).setDefaultMaxPerRoute(CONN_PER_ROUTE);
    	((PoolingClientConnectionManager) clientConnectionManager).setMaxTotal(MAX_CONNECTIONS);

    	this.httpClient = new DefaultHttpClient(clientConnectionManager, params);
        HttpConnectionParams.setConnectionTimeout(this.httpClient.getParams(), CONECTION_TIMEOUT);
        HttpConnectionParams.setSoTimeout(this.httpClient.getParams(), SOCKET_TIMEOUT);
        this.httpClient.getParams().setBooleanParameter(ClientPNames.HANDLE_REDIRECTS, false);
        this.httpClient.getParams().setBooleanParameter(ClientPNames.REJECT_RELATIVE_REDIRECT, false);
        this.httpClient.getParams().setParameter(CoreProtocolPNames.USER_AGENT, this.userAgentString);        
        
        HttpRequestRetryHandler myRetryHandler = new HttpRequestRetryHandler() {
        	@Override
            public boolean retryRequest(IOException exception, int executionCount, HttpContext context) {
                if (executionCount >= MAXIMUM_REQUEST_RETRIES) {
                    // Do not retry if over max retry count
                    return false;
                }
                if (exception instanceof NoHttpResponseException) {
                    // Retry if the server dropped connection on us
                    return true;
                }
                if (exception instanceof SSLHandshakeException) {
                    // Do not retry on SSL handshake exception
                    return false;
                }
                HttpRequest request = (HttpRequest) context.getAttribute(ExecutionContext.HTTP_REQUEST);
                boolean idempotent = !(request instanceof HttpEntityEnclosingRequest); 
                if (idempotent) {
                    // Retry if the request is considered idempotent 
                    return true;
                }
                return false;
            }

        };
        this.httpClient.setHttpRequestRetryHandler(myRetryHandler);
    } 
    
    /**
     * Asks a remote Resolver via HTTP and fetches the URL
     * out of the header of the redirect response of the remote resolver
     * using the "Location" attribute.
     * 
     * @param resolverRequest A eu.europeanaconnect.erds.ResolverRequest object defining the user request.
     * @return A eu.europeanaconnect.erds.ResolverResponse object that contains the resolving information.
     * @throws ResolverException Exception defining the error condition.
     * 
     * @see DataProvider#getResponse(ResolverRequest)
     * @see eu.europeanaconnect.erds.ResolverRequest
     * @see eu.europeanaconnect.erds.ResolverResponse
     * @see eu.europeanaconnect.erds.ResolverException
     * 
     * @since 17.03.2010
     */
    @Override
    public ResolverResponse getResponse(ResolverRequest resolverRequest) throws ResolverException {
        ResolverResponse resolverResponse = new ResolverResponse();
        HttpResponse httpResponse = null; 
        String url = getRequestUrl(resolverRequest);
        if (LOGGER.isDebugEnabled()) LOGGER.debug("URL = " + url);
        HttpGet getMethod = new HttpGet(url);
 
        try {
        	httpResponse = this.httpClient.execute(getMethod);
        	LOGGER.debug("Operation GET successfull");
        } catch (ClientProtocolException e) {
            LOGGER.error("A ClientProtocolException occured!\nStacktrace:\n" + e.getMessage());
        	e.printStackTrace();
            throw new ResolverException(this.id, ResolverExceptionCode.HTTP_PROTOCOL_ERROR, e);
        } catch (IOException e) {
            LOGGER.error("An IOException occured!\nStacktrace:\n" + e.getMessage());
        	e.printStackTrace();
            throw new ResolverException(this.id, ResolverExceptionCode.IO_ERROR, e);
        } catch (RuntimeException e) {
            LOGGER.error("A RuntimeException occured!\nStacktrace:\n" + e.getMessage());
        	e.printStackTrace();
            throw new ResolverException(this.id, ResolverExceptionCode.SEVERE_RUNTIME_ERROR, e);
        } catch (Exception e) {
            LOGGER.error("An Exception occured!\nStacktrace:\n" + e.getMessage());
        	e.printStackTrace();
            throw new ResolverException(this.id, ResolverExceptionCode.UNKNOWN_ERROR, e);
        }
        
        StatusLine statusLine = httpResponse.getStatusLine();
        int statusCode = statusLine.getStatusCode();
        
        if (LOGGER.isDebugEnabled()) LOGGER.debug("HTTP Status-code: " + statusCode);
        if ((statusCode > 299) && (statusCode < 400)) {
        	//It is a redirect See: http://www.w3.org/Protocols/rfc2616/rfc2616.html
            if (LOGGER.isDebugEnabled()) LOGGER.debug("Its a redirect: analyzing the HTTP Header");
        	//logger.debug(getMethod.)
        	if (!httpResponse.containsHeader("Location")) {
        	    LOGGER.error("Header does not contain Location attribute!");
                throw new ResolverException(this.id, ResolverExceptionCode.NO_REDIRECT_ERROR);
        	}
        	if (LOGGER.isDebugEnabled()) LOGGER.debug("Analyzing redirect location");
            Header location = httpResponse.getFirstHeader("Location");
            if (location == null) {
                LOGGER.error("No redirect header for URL: " + url);
                throw new ResolverException(this.id, ResolverExceptionCode.NO_REDIRECT_ERROR);
            }
            if (LOGGER.isDebugEnabled()) LOGGER.debug("Location: " + location.getValue());
            resolverResponse.setUrl(location.getValue());
        } else {
        	if (statusCode < 300) {
        		//that was not an identifier but a real link
        		resolverResponse.setUrl(url);
        	} else {
        		//server returned a 4xx or 5xx code -> handle the error
                handleHttpErrorCodes(statusCode);
        	}
        }

        return resolverResponse;
    }
    
    /**
     * Handles the HTTP status codes that are different than redirect (3xx)
     * @param statusCode HTTP Status code (See RFC 2616)
     * {@link "http://www.w3.org/Protocols/rfc2616/rfc2616.html"}
     * @throws ResolverException
     */
    public void handleHttpErrorCodes(int statusCode) throws ResolverException {
        //See: http://www.w3.org/Protocols/rfc2616/rfc2616.html
        if (statusCode >= 500) {
            throw new ResolverException(this.id, ResolverExceptionCode.REMOTE_RESOLVER_ERROR);
        }
        if (statusCode >= 400) {
            throw new ResolverException(this.id, ResolverExceptionCode.INVALID_IDENTIFIER);
        }
        //It is not a server error (5xx), not a client error (4xx)
        //other codes should be checked earlier in algorithm. So you should not be here!
    }

    /**
     * @return The label.
     */
    public String getLabel() {
        return this.label;
    }

    /**
     * @param label Set the label.
     */
    public void setLabel(String label) {
        this.label = label;
    }

    /**
     * @return Returns the HTTP Client that connects to the server.
     * @see org.apache.http.client.HttpClient
     */
    public HttpClient getHttpClient() {
        return this.httpClient;
    }

    /**
     * @param resolverRequest
     * @return The URL of the request.
     */
    public String getRequestUrl(ResolverRequest resolverRequest) {
        return this.requestUrlPattern.replace("$identifier", resolverRequest.getIdentifier());
    }

    /**
     * @return The pattern of the request.
     */
    public String getRequestUrlPattern() {
        return this.requestUrlPattern;
    }

    /**
     * @param requestUrlPattern
     */
    public void setRequestUrlPattern(String requestUrlPattern) {
        this.requestUrlPattern = requestUrlPattern;
    }

    /**
     * {@inheritDoc} 
     */
    @Override
    public List getSupportedNamespaces() {
        return this.supportedNamespaces;
    }

    /**
     * @param supportedNamespaces
     */
    public void setSupportedNamespaces(List supportedNamespaces) {
        this.supportedNamespaces = supportedNamespaces;
    }

    /**
     * {@inheritDoc} 
     */
    @Override
    public String getIdentifierPattern() {
        return this.identifierPattern;
    }

    /**
     * @param identifierPattern
     */
    public void setIdentifierPattern(String identifierPattern) {
        this.identifierPattern = identifierPattern;
    }

    /**
     * {@inheritDoc} 
     */
    @Override
    public String getId() {
        return this.id;
    }

    /**
     * @param id
     */
    public void setId(String id) {
        this.id = id;
    }

	/**
	 * @return the userAgentString
	 */
	public String getUserAgentString() {
		return this.userAgentString;
	}

	/**
	 * @param newString the userAgentString to set
	 */
	public void setUserAgentString(String newString) {
		this.userAgentString = newString;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy