
org.scijava.util.MirrorWebsite Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of scijava-common Show documentation
Show all versions of scijava-common Show documentation
SciJava Common is a shared library for SciJava software. It provides a plugin framework, with an extensible mechanism for service discovery, backed by its own annotation processor, so that plugins can be loaded dynamically. It is used by both ImageJ and SCIFIO.
/*
* #%L
* SciJava Common shared library for SciJava software.
* %%
* Copyright (C) 2009 - 2016 Board of Regents of the University of
* Wisconsin-Madison, Broad Institute of MIT and Harvard, and Max Planck
* Institute of Molecular Cell Biology and Genetics.
* %%
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* #L%
*/
package org.scijava.util;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
/**
* This program mirrors a given website.
*
* Its primary purpose is to provide the code necessary to keep ImageJ Mirror up-to-date.
*
*
* @author Johannes Schindelin
*/
public class MirrorWebsite {
public final static int THREAD_COUNT = 20;
public final static long DELAY_IN_MICROSECONDS = 0;
private String baseURL;
private String basePath; // the local directory for file:// baseURL, otherwise null
private File localDirectory;
private Map linkMap = new HashMap();
private Set missingLinks = new LinkedHashSet();
private ExecutorService executorService;
private Map jobs;
private Set done;
private int threadCount;
private long delay;
public MirrorWebsite(final String baseURL, final File localDirectory,
final int threadCount, final long delay) {
this.baseURL = baseURL + (baseURL.endsWith("/") ? "" : "/");
this.basePath = baseURL.startsWith("file:") ? baseURL.substring(5) : null;
this.localDirectory = localDirectory;
this.threadCount = threadCount;
this.delay = delay;
}
public void run() throws InterruptedException {
synchronized (this) {
if (jobs != null)
throw new RuntimeException("Mirroring already in progress!");
executorService = Executors.newFixedThreadPool(threadCount);
done = new TreeSet();
jobs = new LinkedHashMap();
mirror("index.html");
}
executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS);
}
public void mirror(String path) {
final MirrorJob job;
synchronized (this) {
if (jobs.containsKey(path)) return;
job = new MirrorJob(path);
jobs.put(path, job);
}
try {
executorService.execute(job);
} catch (Throwable t) {
t.printStackTrace();
done.add(path);
}
}
private static long getRemoteTimestamp(String url) throws IOException {
URLConnection connection = null;
try {
connection = new URL(url).openConnection();
} catch (FileNotFoundException e) {
if (url.endsWith("/index.html"))
connection = new URL(url.substring(0, url.length() - 10)).openConnection();
else
throw e;
}
if (connection instanceof HttpURLConnection)
((HttpURLConnection)connection).setRequestMethod("HEAD");
connection.setUseCaches(false);
long lastModified = connection.getLastModified();
connection.getInputStream().close();
return lastModified;
}
// returns 0 if it is up-to-date, otherwise the desired lastModified
private long upToDate(String path) throws IOException {
long remote = getRemoteTimestamp(baseURL + path);
File file = new File(localDirectory, path);
if (!file.exists())
return remote;
long local = file.lastModified();
return remote < 0 || local == remote ? 0 : remote;
}
private String getValue(String html, int startOffset) {
int offset = startOffset;
while (offset < html.length() &&
(html.charAt(offset) == '\n' || html.charAt(offset) == '\r' || html.charAt(offset) == ' '))
offset++;
if (offset + 1 >= html.length())
return "";
char delim = ' ', delim2 = '>';
char c = html.charAt(offset);
if (c == '"' || c == '\'') {
delim = delim2 = c;
offset++;
}
for (int end = offset; end < html.length(); end++)
if (html.charAt(end) == delim || html.charAt(end) == delim2)
return html.substring(offset, end);
return html.substring(offset);
}
private void addLinkRelation(List result, String sourceURL, String url) {
String normalized = normalizeURL(url);
if (normalized == null)
return;
result.add(normalized);
synchronized(linkMap) {
String previous = linkMap.get(normalized);
if (previous == null)
linkMap.put(normalized, sourceURL);
else if ((" " + previous + " ").indexOf(" " + sourceURL + " ") < 0)
linkMap.put(normalized, previous + " " + sourceURL);
}
}
private List getLinks(String relativePath, String path, String html) {
List result = new ArrayList();
int offset = -1;
for (;;) {
int newOffset = -1;
for (String pattern : new String[] { " href=", " src=", " HREF=", " SRC=" }) {
int tmp = html.indexOf(pattern, offset + 1);
if (tmp >= 0 && (newOffset < 0 || newOffset > tmp))
newOffset = tmp + pattern.length();
}
if (newOffset < 0)
break;
offset = newOffset;
String value = getValue(html, offset);
offset += value.length();
if (value.startsWith("mailto:") || value.startsWith("MAILTO:"))
continue;
for (char c : new char[] { '#', '?', ';' }) {
int hash = value.indexOf(c);
if (hash >= 0)
value = value.substring(0, hash);
}
if (value.endsWith("/"))
value += "index.html";
if (value.startsWith("/")) {
int colon = baseURL.indexOf("://");
int slash = baseURL.indexOf('/', colon + 3);
value = baseURL.substring(0, slash) + value;
}
else if (value.indexOf("://") < 0) {
if (!value.equals(""))
addLinkRelation(result, path, relativePath + value);
if (offset < 0)
break;
continue;
}
if (value.startsWith(baseURL))
addLinkRelation(result, path, value.substring(baseURL.length()));
if (offset < 0)
break;
}
return result;
}
private static boolean isHTML(String path) {
String lower = path.toLowerCase();
return lower.endsWith(".htm") || lower.endsWith(".html");
}
private static void copyStream(InputStream in, StringBuffer string, OutputStream out) throws IOException {
byte[] buffer = new byte[65536];
for (;;) {
int count = in.read(buffer);
if (count < 0)
break;
if (string != null)
string.append(new String(buffer, 0, count));
if (out != null)
out.write(buffer, 0, count);
}
in.close();
if (out != null)
out.close();
}
private List ensureUptodate(String path) throws IOException {
StringBuffer string = new StringBuffer();
String relativePath = path.substring(0, path.lastIndexOf('/') + 1);
File file = new File(localDirectory, path);
// special-case local case: file:/.../ does not list the directory contents
if (basePath != null && ("/" + path).endsWith("/index.html") && !new File(basePath + path).exists()) {
final String directory = path.substring(0, path.length() - 10);
final File[] list = new File(basePath + directory).listFiles();
if (list == null) return Collections.emptyList();
final List result = new ArrayList();
for (final File item : list) {
if (item.isDirectory()) result.add(directory + item.getName() + "/index.html");
else result.add(directory + item.getName());
}
return result;
}
long remoteLastModified;
try {
remoteLastModified = upToDate(path);
if (remoteLastModified == 0) {
if (!isHTML(path))
return Collections.emptyList();
copyStream(new FileInputStream(file), string, null);
return getLinks(relativePath, path, string.toString());
}
} catch (FileNotFoundException e) {
if (!path.endsWith("/index.html"))
throw e;
remoteLastModified = -1;
}
InputStream in = null;
try {
in = new URL(baseURL + path).openStream();
}
catch (MalformedURLException e) {
throw new MalformedURLException(baseURL + path);
}
catch (FileNotFoundException e) {
if (path.endsWith("/index.html"))
in = new URL(baseURL + path.substring(0, path.length() - 10)).openStream();
else
throw e;
}
System.err.println("Downloading " + path);
File tmp = new File(localDirectory, path + ".download.tmp");
tmp.getParentFile().mkdirs();
FileOutputStream out = new FileOutputStream(tmp);
if (isHTML(path)) {
copyStream(in, string, null);
String rewritten = string.toString()
.replaceAll("http://rsb.info.nih.gov",
"http://imagej.nih.gov");
String replacement = "", path2 = path;
for (;;) {
path2 = path2.substring(0, path2.lastIndexOf('/') + 1);
rewritten = rewritten.replaceAll(baseURL + path2, replacement);
// special-case rewriting from a local mirror
if (basePath != null) {
rewritten = rewritten.replaceAll("http://imagej.nih.gov/ij/" + path2, replacement);
}
if (path2.equals(""))
break;
// strip trailing slash
path2 = path2.substring(0, path2.length() - 1);
replacement = "../" + replacement;
}
copyStream(new ByteArrayInputStream(rewritten.getBytes()),
null, out);
}
else
copyStream(in, null, out);
tmp.renameTo(file);
if (remoteLastModified >= 0)
file.setLastModified(remoteLastModified);
if (!isHTML(path))
return Collections.emptyList();
return getLinks(relativePath, path, string.toString());
}
private static String normalizeURL(String originalPath) {
String path = originalPath;
for (;;) {
int dot = path.indexOf("/./");
if (dot >= 0) {
path = path.substring(0, dot) + path.substring(dot + 2);
continue;
}
int dotdot = path.indexOf("/../");
if (dotdot < 0)
break;
if (dotdot == 0)
return null;
int slash = path.lastIndexOf(dotdot - 1);
if (slash < 0)
return null;
path = path.substring(0, slash) + path.substring(dotdot + 3);
}
if (path.startsWith("../"))
throw new RuntimeException("ignore");
return path;
}
private void reportMissingLinks() {
if (missingLinks.size() == 0) return;
System.err.println("Found broken links:");
for (final String path : missingLinks) {
final String source = linkMap.get(path);
System.err.println(path + (source == null ? "" : " (linked from " + source + ")"));
}
}
private class MirrorJob implements Runnable {
private String path;
public MirrorJob(String path) {
this.path = path;
}
@Override
public void run() {
try {
System.err.println("Looking at " + path + " (" + (1 + done.size()) + "/" + jobs.size() + ")");
for (String path2 : ensureUptodate(path)) try {
mirror(path2);
}
catch (Throwable e) {
System.err.println("" + e);
}
}
catch (FileNotFoundException e) {
String source = linkMap.get(path);
System.err.println("" + e + (source == null ? "" : " (linked from " + source + ")"));
missingLinks.add(path);
}
catch (Throwable e) {
System.err.println("Error while trying to mirror " + path);
e.printStackTrace();
}
if (delay > 0) try {
Thread.sleep(delay);
} catch (InterruptedException e) {
// ignore
}
synchronized (MirrorWebsite.this) {
done.add(path);
if (done.size() == jobs.size()) {
executorService.shutdown();
reportMissingLinks();
}
}
}
}
private static void usage() {
System.err.println("Usage: MirrorWebsite [
© 2015 - 2025 Weber Informatics LLC | Privacy Policy