com.jaeksoft.searchlib.crawler.web.screenshot.ScreenshotManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearchserver Show documentation
Show all versions of opensearchserver Show documentation
OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.
The newest version!
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2011-2013 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.crawler.web.screenshot;
import java.awt.Dimension;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.security.NoSuchAlgorithmException;
import java.util.List;
import javax.imageio.ImageIO;
import com.jaeksoft.searchlib.ClientFactory;
import com.jaeksoft.searchlib.Logging;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.config.Config;
import com.jaeksoft.searchlib.crawler.web.browser.BrowserDriverEnum;
import com.jaeksoft.searchlib.crawler.web.database.CredentialItem;
import com.jaeksoft.searchlib.crawler.web.database.WebPropertyManager;
import com.jaeksoft.searchlib.util.LastModifiedAndSize;
import com.jaeksoft.searchlib.util.LinkUtils;
import com.jaeksoft.searchlib.util.Md5Spliter;
import com.jaeksoft.searchlib.util.ReadWriteLock;
import com.jaeksoft.searchlib.util.SimpleLock;
import com.jaeksoft.searchlib.util.properties.PropertyItem;
import com.jaeksoft.searchlib.util.properties.PropertyItemListener;
public class ScreenshotManager implements PropertyItemListener {
private File screenshotDir;
private Config config;
private final ReadWriteLock rwl = new ReadWriteLock();
private final SimpleLock captureLock = new SimpleLock();
private ScreenshotMethodEnum screenshotMethodEnum;
private Dimension captureDimension;
private Dimension resizeDimension;
private BrowserDriverEnum browserDriverEnum;
public ScreenshotManager(Config config) throws SearchLibException, IOException {
this.config = config;
screenshotDir = new File(config.getDirectory(), "screenshot");
if (!screenshotDir.exists())
screenshotDir.mkdir();
BrowserDriverEnum defaultBrowserDriverEnum = BrowserDriverEnum
.find(ClientFactory.INSTANCE.getDefaultWebBrowserDriver().getValue(), BrowserDriverEnum.FIREFOX);
WebPropertyManager props = config.getWebPropertyManager();
browserDriverEnum = BrowserDriverEnum.find(props.getScreenshotBrowser().getValue(), defaultBrowserDriverEnum);
screenshotMethodEnum = ScreenshotMethodEnum.find(props.getScreenshotMethod().getValue());
captureDimension = getCaptureDimension(props);
resizeDimension = getResizeDimension(props);
props.getScreenshotCaptureHeight().addListener(this);
props.getScreenshotCaptureWidth().addListener(this);
props.getScreenshotResizeHeight().addListener(this);
props.getScreenshotResizeWidth().addListener(this);
props.getScreenshotMethod().addListener(this);
props.getScreenshotBrowser().addListener(this);
}
private final File buildFile(URL url) throws SearchLibException {
try {
String md5host = Md5Spliter.getMD5Hash(url.getHost());
File dirPath = new File(screenshotDir, md5host.substring(0, 1) + File.separator + md5host.substring(1, 2));
return new File(dirPath, Md5Spliter.getMD5Hash(url.toExternalForm()) + ".png");
} catch (NoSuchAlgorithmException e) {
throw new SearchLibException(e);
} catch (UnsupportedEncodingException e) {
throw new SearchLibException(e);
}
}
public final File getPngFile(URL url) throws SearchLibException {
rwl.r.lock();
try {
File file = buildFile(url);
return file.exists() ? file : null;
} finally {
rwl.r.unlock();
}
}
public final BufferedImage getImage(URL url) throws SearchLibException, IOException {
rwl.r.lock();
try {
File file = buildFile(url);
return file.exists() ? ImageIO.read(file) : null;
} finally {
rwl.r.unlock();
}
}
private static BrowserDriverEnum getBrowser(WebPropertyManager props) {
return BrowserDriverEnum.find(props.getScreenshotBrowser().getValue(), BrowserDriverEnum.FIREFOX);
}
private static ScreenshotMethodEnum getMethod(WebPropertyManager props) {
return ScreenshotMethodEnum.find(props.getScreenshotMethod().getValue());
}
public ScreenshotMethodEnum getMethod() throws SearchLibException {
rwl.r.lock();
try {
return screenshotMethodEnum;
} finally {
rwl.r.unlock();
}
}
private Dimension getCaptureDimension(WebPropertyManager props) {
Dimension dimension = new Dimension(props.getScreenshotCaptureWidth().getValue(),
props.getScreenshotCaptureHeight().getValue());
return dimension;
}
public Dimension getCaptureDimension() {
rwl.r.lock();
try {
return captureDimension;
} finally {
rwl.r.unlock();
}
}
private Dimension getResizeDimension(WebPropertyManager props) {
Dimension dimension = new Dimension(props.getScreenshotResizeWidth().getValue(),
props.getScreenshotResizeHeight().getValue());
return dimension;
}
public Dimension getResizeDimension() {
rwl.r.lock();
try {
return resizeDimension;
} finally {
rwl.r.unlock();
}
}
public void setMethod(ScreenshotMethodEnum method) throws IOException, SearchLibException {
rwl.w.lock();
try {
config.getWebPropertyManager().getScreenshotMethod().setValue(method.name());
} finally {
rwl.w.unlock();
}
}
@Override
public void hasBeenSet(PropertyItem> prop) throws IOException {
rwl.r.lock();
try {
WebPropertyManager props = config.getWebPropertyManager();
if (prop == props.getScreenshotCaptureHeight() || prop == props.getScreenshotCaptureWidth())
captureDimension = getCaptureDimension(props);
else if (prop == props.getScreenshotResizeHeight() || prop == props.getScreenshotResizeWidth())
resizeDimension = getResizeDimension(props);
else if (prop == props.getScreenshotMethod())
screenshotMethodEnum = getMethod(props);
else if (prop == props.getScreenshotBrowser())
browserDriverEnum = getBrowser(props);
} finally {
rwl.r.unlock();
}
}
public ScreenshotThread capture(URL url, CredentialItem credentialItem, boolean waitForEnd, int secTimeOut)
throws SearchLibException {
rwl.r.lock();
try {
if (!screenshotMethodEnum.doScreenshot(url))
return null;
captureLock.rl.lock();
try {
ScreenshotThread thread = new ScreenshotThread(config, this, url, credentialItem, browserDriverEnum);
thread.execute(180);
if (waitForEnd)
thread.waitForEnd(secTimeOut);
return thread;
} finally {
captureLock.rl.unlock();
}
} finally {
rwl.r.unlock();
}
}
public void store(URL url, BufferedImage image) throws SearchLibException, IOException {
rwl.r.lock();
try {
File file = buildFile(url);
File parentDir = file.getParentFile();
if (!parentDir.exists())
parentDir.mkdirs();
ImageIO.write(image, "png", file);
} finally {
rwl.r.unlock();
}
}
public void delete(URL url) throws SearchLibException {
rwl.r.lock();
try {
getPngFile(url).delete();
} finally {
rwl.r.unlock();
}
}
public void delete(List urlList) throws SearchLibException {
rwl.r.lock();
try {
for (String u : urlList) {
try {
URL url = LinkUtils.newEncodedURL(u);
File f = getPngFile(url);
if (f != null)
getPngFile(url).delete();
} catch (MalformedURLException e) {
Logging.warn(e);
} catch (URISyntaxException e) {
Logging.warn(e);
}
}
} finally {
rwl.r.unlock();
}
}
private static final void purge(File directory, long timeLimit) {
for (File file : directory.listFiles()) {
if (file.isDirectory()) {
purge(file, timeLimit);
} else {
if (file.lastModified() < timeLimit)
file.delete();
}
}
if (directory.list().length == 0)
directory.delete();
}
public void purgeOldFiles(long timeLimit) {
purge(screenshotDir, timeLimit);
}
public LastModifiedAndSize getInfos() throws SearchLibException {
rwl.r.lock();
try {
return new LastModifiedAndSize(screenshotDir, true);
} finally {
rwl.r.unlock();
}
}
/**
* @return the browserDriverEnum
*/
public BrowserDriverEnum getBrowserDriver() {
rwl.r.lock();
try {
return browserDriverEnum;
} finally {
rwl.r.unlock();
}
}
/**
* @param browserDriverEnum
* the browserDriverEnum to set
* @throws SearchLibException
* @throws IOException
*/
public void setBrowserDriver(BrowserDriverEnum browserDriverEnum) throws IOException, SearchLibException {
rwl.w.lock();
try {
config.getWebPropertyManager().getScreenshotBrowser().setValue(browserDriverEnum.name());
} finally {
rwl.w.unlock();
}
}
}