org.archive.net.md5.Handler Maven / Gradle / Ivy
/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.net.md5;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLStreamHandler;
/**
* A protocol handler for an 'md5' URI scheme.
* Md5 URLs look like this: md5:deadbeefdeadbeefdeadbeefdeadbeef
* When this handler is invoked against an md5 URL, it passes the raw md5 to
* the configured script as an argument. The configured script then does the
* work to bring the item pointed to by the md5 local so we can open a Stream
* on the local copy. Local file is deleted when we finish. Do
* {@link org.archive.net.DownloadURLConnection#getFile()} to get name of
* temporary file.
*
* You need to define the system property
* -Djava.protocol.handler.pkgs=org.archive.net
to add this handler
* to the java.net.URL set. Also define system properties
* -Dorg.archive.net.md5.Md5URLConnection.path=PATH_TO_SCRIPT
to
* pass path of script to run as well as
* -Dorg.archive.net.md5.Md5URLConnection.options=OPTIONS
for
* any options you'd like to include. The pointed-to PATH_TO_SCRIPT
* will be invoked as follows: PATH_TO_SCRIPT OPTIONS MD5
* LOCAL_TMP_FILE
. The LOCAL_TMP_FILE file is made in
* java.io.tmpdir
using java tmp name code.
* @author stack
*/
public class Handler extends URLStreamHandler {
protected URLConnection openConnection(URL u) {
return new Md5URLConnection(u);
}
/**
* Main dumps rsync file to STDOUT.
* @param args
* @throws IOException
*/
public static void main(String[] args)
throws IOException {
if (args.length != 1) {
System.out.println("Usage: java java " +
"-Djava.protocol.handler.pkgs=org.archive.net " +
"org.archive.net.md5.Handler " +
"md5:deadbeefdeadbeefdeadbeefdeadbeef");
System.exit(1);
}
System.setProperty("org.archive.net.md5.Md5URLConnection.path",
"/tmp/manifest");
System.setProperty("java.protocol.handler.pkgs", "org.archive.net");
URL u = new URL(args[0]);
URLConnection connect = u.openConnection();
// Write download to stdout.
final int bufferlength = 4096;
byte [] buffer = new byte [bufferlength];
InputStream is = connect.getInputStream();
try {
for (int count = is.read(buffer, 0, bufferlength);
(count = is.read(buffer, 0, bufferlength)) != -1;) {
System.out.write(buffer, 0, count);
}
System.out.flush();
} finally {
is.close();
}
}
}