
us.codecraft.webmagic.downloader.HttpClientGenerator Maven / Gradle / Ivy
package us.codecraft.webmagic.downloader;
import org.apache.http.HttpException;
import org.apache.http.HttpRequest;
import org.apache.http.HttpRequestInterceptor;
import org.apache.http.client.CookieStore;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.config.SocketConfig;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.impl.client.*;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.impl.cookie.BasicClientCookie;
import org.apache.http.protocol.HttpContext;
import us.codecraft.webmagic.Site;
import java.io.IOException;
import java.util.Map;
/**
* @author [email protected]
* @since 0.4.0
*/
public class HttpClientGenerator {
private PoolingHttpClientConnectionManager connectionManager;
public HttpClientGenerator() {
Registry reg = RegistryBuilder.create()
.register("http", PlainConnectionSocketFactory.INSTANCE)
.register("https", SSLConnectionSocketFactory.getSocketFactory())
.build();
connectionManager = new PoolingHttpClientConnectionManager(reg);
connectionManager.setDefaultMaxPerRoute(100);
}
public HttpClientGenerator setPoolSize(int poolSize) {
connectionManager.setMaxTotal(poolSize);
return this;
}
public CloseableHttpClient getClient(Site site) {
return generateClient(site);
}
private CloseableHttpClient generateClient(Site site) {
HttpClientBuilder httpClientBuilder = HttpClients.custom().setConnectionManager(connectionManager);
if (site != null && site.getUserAgent() != null) {
httpClientBuilder.setUserAgent(site.getUserAgent());
} else {
httpClientBuilder.setUserAgent("");
}
if (site == null || site.isUseGzip()) {
httpClientBuilder.addInterceptorFirst(new HttpRequestInterceptor() {
public void process(
final HttpRequest request,
final HttpContext context) throws HttpException, IOException {
if (!request.containsHeader("Accept-Encoding")) {
request.addHeader("Accept-Encoding", "gzip");
}
}
});
}
SocketConfig socketConfig = SocketConfig.custom().setSoKeepAlive(true).setTcpNoDelay(true).build();
httpClientBuilder.setDefaultSocketConfig(socketConfig);
if (site != null) {
httpClientBuilder.setRetryHandler(new DefaultHttpRequestRetryHandler(site.getRetryTimes(), true));
}
generateCookie(httpClientBuilder, site);
return httpClientBuilder.build();
}
private void generateCookie(HttpClientBuilder httpClientBuilder, Site site) {
CookieStore cookieStore = new BasicCookieStore();
for (Map.Entry cookieEntry : site.getCookies().entrySet()) {
BasicClientCookie cookie = new BasicClientCookie(cookieEntry.getKey(), cookieEntry.getValue());
cookie.setDomain(site.getDomain());
cookieStore.addCookie(cookie);
}
for (Map.Entry> domainEntry : site.getAllCookies().entrySet()) {
for (Map.Entry cookieEntry : domainEntry.getValue().entrySet()) {
BasicClientCookie cookie = new BasicClientCookie(cookieEntry.getKey(), cookieEntry.getValue());
cookie.setDomain(domainEntry.getKey());
cookieStore.addCookie(cookie);
}
}
httpClientBuilder.setDefaultCookieStore(cookieStore);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy