it.tidalwave.bluemarine2.downloader.impl.DefaultDownloader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of it-tidalwave-bluemarine2-downloader Show documentation
Show all versions of it-tidalwave-bluemarine2-downloader Show documentation
The component that is capable to download stuff from the internet.
/*
* #%L
* *********************************************************************************************************************
*
* blueMarine2 - Semantic Media Center
* http://bluemarine2.tidalwave.it - git clone https://[email protected]/tidalwave/bluemarine2-src.git
* %%
* Copyright (C) 2015 - 2016 Tidalwave s.a.s. (http://tidalwave.it)
* %%
*
* *********************************************************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
* *********************************************************************************************************************
*
* $Id$
*
* *********************************************************************************************************************
* #L%
*/
package it.tidalwave.bluemarine2.downloader.impl;
import javax.annotation.Nonnull;
import javax.annotation.PostConstruct;
import javax.inject.Inject;
import java.util.Arrays;
import java.util.Date;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URL;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.ProtocolException;
import org.apache.http.client.RedirectStrategy;
import org.apache.http.client.cache.CacheResponseStatus;
import org.apache.http.client.cache.HttpCacheContext;
import org.apache.http.client.cache.HttpCacheEntry;
import org.apache.http.client.cache.Resource;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.message.BasicHeader;
import org.apache.http.protocol.HttpContext;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.cache.CacheConfig;
import org.apache.http.impl.client.cache.CachingHttpClients;
import org.apache.http.impl.client.cache.HeapResource;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import it.tidalwave.util.NotFoundException;
import it.tidalwave.messagebus.MessageBus;
import it.tidalwave.messagebus.annotation.ListensTo;
import it.tidalwave.messagebus.annotation.SimpleMessageSubscriber;
import it.tidalwave.bluemarine2.util.PowerOnNotification;
import it.tidalwave.bluemarine2.downloader.DownloadComplete;
import it.tidalwave.bluemarine2.downloader.DownloadComplete.Origin;
import it.tidalwave.bluemarine2.downloader.DownloadRequest;
import lombok.Cleanup;
import lombok.extern.slf4j.Slf4j;
import static it.tidalwave.bluemarine2.downloader.PropertyNames.CACHE_FOLDER_PATH;
import org.apache.http.HttpResponseInterceptor;
/***********************************************************************************************************************
*
* @author Fabrizio Giudici
* @version $Id$
*
**********************************************************************************************************************/
@SimpleMessageSubscriber @Slf4j
public class DefaultDownloader
{
@Inject
private MessageBus messageBus;
@Inject
private SimpleHttpCacheStorage cacheStorage;
private PoolingHttpClientConnectionManager connectionManager;
private CacheConfig cacheConfig;
private CloseableHttpClient httpClient;
private final HttpResponseInterceptor killCacheHeaders = (HttpResponse
response, HttpContext context) ->
{
response.removeHeaders("Expires");
response.removeHeaders("Pragma");
response.removeHeaders("Cache-Control");
response.addHeader("Expires", "Mon, 31 Dec 2099 00:00:00 GMT");
};
/*******************************************************************************************************************
*
*
*
******************************************************************************************************************/
// FIXME: this is because there's a fix, and we explicitly save stuff in the cache - see below
private final RedirectStrategy dontFollowRedirect = new RedirectStrategy()
{
@Override
public boolean isRedirected (HttpRequest request, HttpResponse response, HttpContext context)
throws ProtocolException
{
return false;
}
@Override
public HttpUriRequest getRedirect (HttpRequest request, HttpResponse response, HttpContext context)
throws ProtocolException
{
return null;
}
};
/*******************************************************************************************************************
*
*
*
******************************************************************************************************************/
@PostConstruct
/* VisibleForTesting */ void initialize()
{
connectionManager = new PoolingHttpClientConnectionManager();
connectionManager.setMaxTotal(200);
connectionManager.setDefaultMaxPerRoute(20);
cacheConfig = CacheConfig.custom()
.setAllow303Caching(true)
.setMaxCacheEntries(Integer.MAX_VALUE)
.setMaxObjectSize(Integer.MAX_VALUE)
.setSharedCache(false)
.setHeuristicCachingEnabled(true)
.build();
httpClient = CachingHttpClients.custom()
.setHttpCacheStorage(cacheStorage)
.setCacheConfig(cacheConfig)
.setRedirectStrategy(dontFollowRedirect)
.setUserAgent("blueMarine ([email protected])")
.setDefaultHeaders(Arrays.asList(new BasicHeader("Accept", "application/n3")))
.setConnectionManager(connectionManager)
.addInterceptorFirst(killCacheHeaders) // FIXME: only if explicitly configured
.build();
}
/*******************************************************************************************************************
*
*
*
******************************************************************************************************************/
/* VisibleForTesting */ void onPowerOnNotification (final @ListensTo @Nonnull PowerOnNotification notification)
throws NotFoundException
{
log.info("onPowerOnNotification({})", notification);
cacheStorage.setFolderPath(notification.getProperties().get(CACHE_FOLDER_PATH));
}
/*******************************************************************************************************************
*
*
*
******************************************************************************************************************/
/* VisibleForTesting */ void onDownloadRequest (final @ListensTo @Nonnull DownloadRequest request)
throws URISyntaxException
{
try
{
log.info("onDownloadRequest({})", request);
URL url = request.getUrl();
for (;;)
{
final HttpCacheContext context = HttpCacheContext.create();
@Cleanup final CloseableHttpResponse response = httpClient.execute(new HttpGet(url.toURI()), context);
final byte[] bytes = bytesFrom(response);
final CacheResponseStatus cacheResponseStatus = context.getCacheResponseStatus();
log.debug(">>>> cacheResponseStatus: {}", cacheResponseStatus);
final Origin origin = cacheResponseStatus.equals(CacheResponseStatus.CACHE_HIT) ? Origin.CACHE
: Origin.NETWORK;
// FIXME: shouldn't do this by myself
// FIXME: upon configuration, everything should be cached (needed for supporting integration tests)
if (!origin.equals(Origin.CACHE) && Arrays.asList(200, 303).contains(response.getStatusLine().getStatusCode()))
{
final Date date = new Date();
final Resource resource = new HeapResource(bytes);
cacheStorage.putEntry(url.toExternalForm(),
new HttpCacheEntry(date, date, response.getStatusLine(), response.getAllHeaders(), resource));
}
// FIXME: if the redirect were enabled, we could drop this check
if (request.isOptionPresent(DownloadRequest.Option.FOLLOW_REDIRECT)
&& response.getStatusLine().getStatusCode() == 303) // SEE_OTHER FIXME
{
url = new URL(response.getFirstHeader("Location").getValue());
log.info(">>>> following 'see also' to {} ...", url);
}
else
{
messageBus.publish(new DownloadComplete(request.getUrl(),
response.getStatusLine().getStatusCode(),
bytes,
origin));
return;
}
}
}
catch (IOException e)
{
log.error("{}: {}", request.getUrl(), e.toString());
messageBus.publish(new DownloadComplete(request.getUrl(), -1, new byte[0], Origin.NETWORK));
}
}
/*******************************************************************************************************************
*
*
*
******************************************************************************************************************/
@Nonnull
private byte[] bytesFrom (final @Nonnull HttpResponse response)
throws IOException
{
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
if (response.getEntity() != null)
{
response.getEntity().writeTo(baos);
}
return baos.toByteArray();
}
}