All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ar.com.zauber.leviathan.impl.BulkURIFetchers Maven / Gradle / Ivy

There is a newer version: 1.0.2
Show newest version
/**
 * Copyright (c) 2009-2014 Zauber S.A. 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package ar.com.zauber.leviathan.impl;

import static java.util.Arrays.*;

import java.util.HashMap;
import java.util.Map;

import org.apache.http.HttpVersion;
import org.apache.http.client.HttpClient;
import org.apache.http.client.params.ClientParamBean;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.params.ConnManagerParamBean;
import org.apache.http.conn.params.ConnPerRouteBean;
import org.apache.http.conn.scheme.PlainSocketFactory;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpConnectionParamBean;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;
import org.apache.http.params.HttpProtocolParamBean;
import org.apache.http.params.HttpProtocolParams;

import ar.com.zauber.commons.dao.Closure;
import ar.com.zauber.leviathan.api.URIFetcher;
import ar.com.zauber.leviathan.common.CharsetStrategy;
import ar.com.zauber.leviathan.impl.httpclient.HTTPClientURIFetcher;
import ar.com.zauber.leviathan.impl.httpclient.charset.ChainedCharsetStrategy;
import ar.com.zauber.leviathan.impl.httpclient.charset.DefaultHttpCharsetStrategy;
import ar.com.zauber.leviathan.impl.httpclient.charset.FixedCharsetStrategy;

/**
 * Helps with the creation of {@link BulkURIFetcher}s.
 *
 * @author Juan F. Codagnone
 * @since Oct 12, 2009
 */
public final class BulkURIFetchers {
    private static final HttpParams PARAMS = new BasicHttpParams();

    static {
        HttpConnectionParams.setConnectionTimeout(PARAMS, 20 * 1000);
        HttpProtocolParams.setVersion(PARAMS, HttpVersion.HTTP_1_1);
    }

    /** utility class */
    private BulkURIFetchers() {
        // void
    }
    
    public static URIFetcher createSafeHttpClientURIFetcher() {
        return createSafeHttpClientURIFetcher(null);
    }
    
    /** create a safe {@link URIFetcher} */
    public static URIFetcher createSafeHttpClientURIFetcher(final Closure paramsClosure) {
        return createSafeHttpClientURIFetcher(paramsClosure, null);
    }
    /** create a safe {@link URIFetcher} */
    public static URIFetcher createSafeHttpClientURIFetcher(final Closure paramsClosure,
            final Closure httpClientClosure) {
        final Map registries = new HashMap();
        registries.put("http", new Scheme("http", PlainSocketFactory.getSocketFactory(), 80));
        registries.put("https", new Scheme("https", SSLSocketFactory.getSocketFactory(), 443));
        final SchemeRegistry schemaRegistry = new SchemeRegistry();
        schemaRegistry.setItems(registries);
        
        final HttpParams params = createHttpParams();
        if(paramsClosure != null) {
            paramsClosure.execute(params);
        }
        final ClientConnectionManager cm = new ThreadSafeClientConnManager(params, schemaRegistry);
        final DefaultHttpClient httpclient = new DefaultHttpClient(cm, params);
        if(httpClientClosure != null) {
            httpClientClosure.execute(httpclient);
        }
        final CharsetStrategy charsetStrategy = new ChainedCharsetStrategy(asList(
                new DefaultHttpCharsetStrategy(), new FixedCharsetStrategy("utf-8")));
        
        return new HTTPClientURIFetcher(httpclient, charsetStrategy);
    }
    

    /** TODO we should load these from a Properties file. */
    private static HttpParams createHttpParams() {
        final HttpParams params = new BasicHttpParams();
        
        final HttpProtocolParamBean httpParams = new HttpProtocolParamBean(params);
        httpParams.setContentCharset("iso-8859-1");
        httpParams.setHttpElementCharset("iso-8859-1");
        httpParams.setUseExpectContinue(true);
        httpParams.setUserAgent("Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; GTB6.4; "
                             +  "InfoPath.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; "
                             + ".NET CLR 3.5.30729; OfficeLiveConnector.1.3; OfficeLivePatch.0.0)");
        httpParams.setVersion(HttpVersion.HTTP_1_1);
        
        final HttpConnectionParamBean connectionParam = new HttpConnectionParamBean(params);
        connectionParam.setConnectionTimeout(10000);
        connectionParam.setSoTimeout(20000);
        connectionParam.setLinger(-1);
        connectionParam.setSocketBufferSize(8 * 1024);
        connectionParam.setStaleCheckingEnabled(true);
        connectionParam.setTcpNoDelay(true);
        
        final ClientParamBean clientParam = new ClientParamBean(params);
        clientParam.setHandleRedirects(true);
        clientParam.setRejectRelativeRedirect(false);
        clientParam.setMaxRedirects(10);
        clientParam.setAllowCircularRedirects(false);
        
        final ConnManagerParamBean connManagerParam = new ConnManagerParamBean(params);
        connManagerParam.setConnectionsPerRoute(new ConnPerRouteBean(5));
        connManagerParam.setMaxTotalConnections(100);
        
        final ConnManagerParamBean connConnectionParam = new ConnManagerParamBean(params);
        return params;
    }
    
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy