
com.jaeksoft.searchlib.crawler.cache.CrawlCacheManager Maven / Gradle / Ivy
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2012-2014 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.crawler.cache;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.InvalidPropertiesFormatException;
import java.util.Properties;
import org.json.JSONException;
import com.jaeksoft.searchlib.Logging;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.config.Config;
import com.jaeksoft.searchlib.crawler.web.spider.DownloadItem;
import com.jaeksoft.searchlib.util.PropertiesUtils;
import com.jaeksoft.searchlib.util.ReadWriteLock;
import com.jaeksoft.searchlib.web.StartStopListener;
public class CrawlCacheManager implements Closeable {
private final ReadWriteLock rwl = new ReadWriteLock();
private final static String CRAWLCACHE_PROPERTY_FILE = "crawlCache.xml";
private final static String CRAWLCACHE_PROPERTY_ENABLED = "enabled";
private final static String CRAWLCACHE_PROPERTY_EXPIRATION_VALUE = "expirationValue";
private final static String CRAWCACHE_PROPERTY_EXPIRATION_UNIT = "expirationUnit";
private final static String CRAWCACHE_PROPERTY_PROVIDER_TYPE = "provider";
private final static String CRAWCACHE_PROPERTY_CONFIGURATION = "configuration";
private CrawlCacheProvider crawlCache;
private CrawlCacheProviderEnum crawlCacheProvider;
private boolean enabled;
private long expirationValue;
private String expirationUnit;
private String configuration;
private File propFile;
public CrawlCacheManager(File confDir) throws InstantiationException,
IllegalAccessException, IOException {
crawlCache = null;
propFile = new File(confDir, CRAWLCACHE_PROPERTY_FILE);
Properties properties = PropertiesUtils.loadFromXml(propFile);
enabled = "true".equals(properties.getProperty(
CRAWLCACHE_PROPERTY_ENABLED, "false"));
expirationValue = Integer.parseInt(properties.getProperty(
CRAWLCACHE_PROPERTY_EXPIRATION_VALUE, "0"));
expirationUnit = properties.getProperty(
CRAWCACHE_PROPERTY_EXPIRATION_UNIT, "days");
crawlCacheProvider = CrawlCacheProviderEnum.find(properties
.getProperty(CRAWCACHE_PROPERTY_PROVIDER_TYPE));
configuration = properties
.getProperty(CRAWCACHE_PROPERTY_CONFIGURATION);
crawlCache = crawlCacheProvider.getNewInstance();
try {
setEnabled(enabled);
} catch (IOException e) {
Logging.warn("Enabling the crawl cache failed.", e);
}
}
private static CrawlCacheManager INSTANCE = null;
final private static ReadWriteLock rwlInstance = new ReadWriteLock();
public static final CrawlCacheManager getGlobalInstance()
throws SearchLibException {
rwlInstance.r.lock();
try {
if (INSTANCE != null)
return INSTANCE;
} finally {
rwlInstance.r.unlock();
}
rwlInstance.w.lock();
try {
if (INSTANCE != null)
return INSTANCE;
return INSTANCE = new CrawlCacheManager(
StartStopListener.OPENSEARCHSERVER_DATA_FILE);
} catch (InvalidPropertiesFormatException e) {
throw new SearchLibException(e);
} catch (IOException e) {
throw new SearchLibException(e);
} catch (InstantiationException e) {
throw new SearchLibException(e);
} catch (IllegalAccessException e) {
throw new SearchLibException(e);
} finally {
rwlInstance.w.unlock();
}
}
public static final CrawlCacheManager getInstance(Config config)
throws SearchLibException {
CrawlCacheManager crawlCacheManager = config.getCrawlCacheManager();
if (crawlCacheManager.isEnabled())
return crawlCacheManager;
return getGlobalInstance();
}
private void save() throws IOException {
Properties properties = new Properties();
properties.setProperty(CRAWLCACHE_PROPERTY_ENABLED,
Boolean.toString(enabled));
properties.setProperty(CRAWLCACHE_PROPERTY_EXPIRATION_VALUE,
Long.toString(expirationValue));
properties.setProperty(CRAWCACHE_PROPERTY_EXPIRATION_UNIT,
expirationUnit);
properties.setProperty(CRAWCACHE_PROPERTY_PROVIDER_TYPE,
crawlCacheProvider.name());
if (configuration != null)
properties.setProperty(CRAWCACHE_PROPERTY_CONFIGURATION,
configuration);
PropertiesUtils.storeToXml(properties, propFile);
}
@Override
public void close() {
rwl.w.lock();
try {
if (crawlCache != null)
crawlCache.close();
} finally {
rwl.w.unlock();
}
}
public String getInfos() throws IOException {
rwl.r.lock();
try {
return crawlCache.getInfos();
} finally {
rwl.r.unlock();
}
}
public String getConfigurationInformation() throws IOException {
rwl.r.lock();
try {
return crawlCache.getConfigurationInformation();
} finally {
rwl.r.unlock();
}
}
public InputStream storeCache(DownloadItem downloadItem)
throws IOException, JSONException {
rwl.r.lock();
try {
if (!enabled)
return downloadItem.getContentInputStream();
else
return crawlCache.store(downloadItem);
} finally {
rwl.r.unlock();
}
}
private long getExpirationDate() {
if (expirationValue == 0)
return 0;
long l;
if ("hours".equalsIgnoreCase(expirationUnit))
l = expirationValue * 1000 * 3600;
else if ("minutes".equalsIgnoreCase(expirationUnit))
l = expirationValue * 1000 * 60;
else
// Default is days
l = expirationValue * 1000 * 86400;
if (Logging.isDebug)
Logging.debug("ExpirationDate l = " + l);
return System.currentTimeMillis() - l;
}
public DownloadItem loadCache(URI uri) throws IOException, JSONException,
URISyntaxException {
rwl.r.lock();
try {
if (!enabled)
return null;
return crawlCache.load(uri, getExpirationDate());
} finally {
rwl.r.unlock();
}
}
public boolean flushCache(URI uri) throws IOException {
rwl.r.lock();
try {
if (!enabled)
return false;
return crawlCache.flush(uri);
} finally {
rwl.r.unlock();
}
}
public long flushCache(boolean expiration) throws IOException {
rwl.r.lock();
try {
long exp = expiration ? getExpirationDate() : System
.currentTimeMillis();
return crawlCache.flush(exp);
} finally {
rwl.r.unlock();
}
}
/**
* @return the enabled
*/
public boolean isEnabled() {
rwl.r.lock();
try {
return enabled;
} finally {
rwl.r.unlock();
}
}
public boolean isDisabled() {
return !isEnabled();
}
/**
* @param enabled
* the enabled to set
* @throws IOException
*/
public void setEnabled(boolean enabled) throws IOException {
rwl.w.lock();
try {
if (!enabled)
crawlCache.close();
else
crawlCache.init(configuration);
this.enabled = enabled;
save();
} finally {
rwl.w.unlock();
}
}
/**
* @return the expirationValue
*/
public long getExpirationValue() {
rwl.r.lock();
try {
return expirationValue;
} finally {
rwl.r.unlock();
}
}
public boolean isExpiration() {
rwl.r.lock();
try {
return getExpirationDate() != 0;
} finally {
rwl.r.unlock();
}
}
/**
* @param expirationValue
* the expirationValue to set
* @throws IOException
*/
public void setExpirationValue(long expirationValue) throws IOException {
rwl.w.lock();
try {
this.expirationValue = expirationValue;
save();
} finally {
rwl.w.unlock();
}
}
private final static String[] expirationUnitValues = { "days", "hours",
"minutes" };
public String[] getExpirationUnitValues() {
return expirationUnitValues;
}
/**
* @return the expirationUnit
*/
public String getExpirationUnit() {
rwl.r.lock();
try {
return expirationUnit;
} finally {
rwl.r.unlock();
}
}
/**
* @param expirationUnit
* the expirationUnit to set
* @throws IOException
*/
public void setExpirationUnit(String expirationUnit) throws IOException {
rwl.w.lock();
try {
this.expirationUnit = expirationUnit;
save();
} finally {
rwl.w.unlock();
}
}
/**
* @return the crawlCacheProvider
*/
public CrawlCacheProviderEnum getCrawlCacheProvider() {
return crawlCacheProvider;
}
/**
* @param crawlCacheProvider
* the crawlCacheProvider to set
* @throws SearchLibException
* @throws IllegalAccessException
* @throws InstantiationException
* @throws IOException
*/
public void setCrawlCacheProvider(CrawlCacheProviderEnum crawlCacheProvider)
throws SearchLibException, InstantiationException,
IllegalAccessException, IOException {
rwl.w.lock();
try {
if (enabled)
throw new SearchLibException("Crawl cache is running");
if (this.crawlCacheProvider == crawlCacheProvider)
return;
this.crawlCacheProvider = crawlCacheProvider;
this.crawlCache = crawlCacheProvider.getNewInstance();
save();
} finally {
rwl.w.unlock();
}
}
/**
* @return the configuration
*/
public String getConfiguration() {
rwl.r.lock();
try {
return configuration;
} finally {
rwl.r.unlock();
}
}
/**
* @param configuration
* the configuration to set
* @throws SearchLibException
* @throws IOException
*/
public void setConfiguration(String configuration)
throws SearchLibException, IOException {
rwl.w.lock();
try {
if (enabled)
throw new SearchLibException("Crawl crawl is running");
this.configuration = configuration;
save();
} finally {
rwl.w.unlock();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy