uk.ac.starlink.util.URLUtils Maven / Gradle / Ivy
Show all versions of stil Show documentation
package uk.ac.starlink.util;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLDecoder;
import java.net.URLStreamHandlerFactory;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Provides convenience methods for resolving URLs.
* This class provides some static methods for turning strings into URLs.
* This tends to be a bit of a pain in java, since you have to watch
* out for MalformedURLExceptions all over and work out what the
* context is. The methods provided here assume that if a string
* looks like a URL it is one, if it doesn't it's a file name, and
* if it's not absolute or resolved against a given context
* it is relative to the current directory.
* From the point of view of a user providing text to an application,
* or an XML document providing an href, this is nearly always
* what is wanted. The strategy can lead to surprising situations
* in the case that wacky URL protocols are used; for instance if
* makeURL is called on the string "gftp://host/file" and
* no gftp handler is installed, it will be interpreted as a file-protocol
* URL referring to the (presumably non-existent) file "gftp://host/file".
* In this case the eventual upshot will presumably be a file-not-found
* type error rather than a MalformedURLException type error getting
* presented to the user. Users of this class should be of the opinion
* that this is not a particularly bad thing.
*
* The systemId strings used by {@link javax.xml.transform.Source}s
* have similar semantics to the strings which this class converts
* to URLs or contexts.
*
* This class assumes that the "file:" protocol is legal for URLs,
* and will throw AssertionErrors if this turns out not to be the case.
*
* @author Mark Taylor (Starlink)
* @author Norman Gray (Starlink)
*/
public class URLUtils {
private static final Logger logger_ =
Logger.getLogger( "uk.ac.starlink.util" );
/* Set up a URL representing the default context (the current directory). */
private static URL defaultContext;
static {
try {
defaultContext = new URL( "file:." );
}
catch ( MalformedURLException e ) {
throw protestFileProtocolIsLegal( e );
}
catch ( SecurityException e ) {
defaultContext = null;
}
}
private static final Pattern FILE_URL_REGEX =
Pattern.compile( "(file:)(/*)(.*)" );
/**
* Private constructor prevents instantiation.
*/
private URLUtils() {
}
/**
* Obtains a URL from a string. If the String has the form of a URL,
* it is turned directly into a URL. If it does not, it is treated as
* a filename, and turned into a file-protocol URL. In the latter
* case a relative or absolute filename may be used. If it is
* null or a blank string (or something else equally un-filename like?)
* then null is returned.
*
* @param location a string representing the location of a resource
* @return a URL representing the location of the resource
*/
public static URL makeURL( String location ) {
if ( location == null || location.trim().length() == 0 ) {
return null;
}
try {
return new URL( location );
}
catch ( MalformedURLException e ) {
try {
URI uri = new File( location ).toURI();
return uri.toURL();
//return new URL( uri.toString() );
}
catch ( MalformedURLException e2 ) {
throw protestFileProtocolIsLegal( e2 );
}
}
catch ( SecurityException e ) {
try {
return new URL( "file:" + location );
}
catch ( MalformedURLException e2 ) {
throw protestFileProtocolIsLegal( e2 );
}
}
}
/**
* Obtains a URL from a string in a given context.
* The string context is turned into a URL as per
* the {@link #makeURL(String)} method, unless it is null or
* the empty string, in which case it is treated as a reference
* to the current directory.
* The string location is then turned into a URL in
* the same way as using {@link #makeURL(String)}, except that
* if it represents a relative path it is resolved in the context
* of context, taking its protocol and/or relative position
* from it.
*
* @param context a string representing the context within which
* location is to be resolved
* @param location a string representing the location of a resource
* @return a URL representing the location of the resource
*/
public static URL makeURL( String context, String location ) {
URL contextURL;
if ( context == null || context.trim().length() == 0 ) {
contextURL = defaultContext;
}
else {
contextURL = makeURL( context );
}
try {
return new URL( contextURL, location );
}
catch ( MalformedURLException e ) {
try {
return new URL( contextURL, makeURL( location ).toString() );
}
catch ( MalformedURLException e2 ) {
// can this happen??
return makeURL( location );
}
}
}
/**
* Returns an Error which can be thrown when you can't make a URL even
* though you know you're using the "file:" protocol. Although this
* is permitted by the URL class, we consider ourselves to be on
* an irretrievably broken system if it happens.
*/
private static AssertionError
protestFileProtocolIsLegal( MalformedURLException e ) {
AssertionError ae =
new AssertionError( "Illegal \"file:\" protocol in URL??" );
ae.initCause( e );
return ae;
}
/**
* Turns a URL into a URI.
*
*
Since URIs are syntactically and semantically a superset of
* URLs, this conversion should not cause any errors. If,
* however, the input URL is malformed in rather extreme ways,
* then the URI construction will fail. These ways include (but
* are not necesssarily limited to) the features discussed in
* {@link java.net.URI#URI(String,String,String,String,String)},
* namely that a scheme is present, but with a relative path, or
* that it has a registry-based authority part.
*
*
Because of the way the class does the conversion, the method
* will itself resolve some malformations of URLs. You should not rely
* on this, however, firstly because the method might in principle
* change, but mostly because you should avoid creating such
* malformed URLs in the first place.
*
*
The most common source of malformed URLs is that of
* file
URLs which have inadequately escaped
* (windows) drive letters or spaces in the name: such URLs should
* be constructed using the {@link java.io.File#toURI} or {@link
* java.io.File#toURL} methods. Such URLs will be escaped by
* this method.
*
* @param url a URL to be converted. If this is null, then the
* method returns null
* @return the input URL as a URI, or null if the input was null
* @throws MalformedURLException if the URI cannot be constructed
* because the input URL turns out to be malformed
*/
public static URI urlToUri( URL url )
throws MalformedURLException {
/*
* Weaknesses: this method doesn't cope with URIs which have
* a scheme plus a relative path, or registry-based authorities.
* Ought it to? In the absence of specific use-cases,
* probably not, but we should note that this might be
* reasonable and be prepared to revisit it.
*/
if (url == null)
return null;
try {
return new URI(url.getProtocol(),
url.getAuthority(),
url.getPath(),
url.getQuery(),
url.getRef() // ie, fragment
);
} catch (java.net.URISyntaxException e) {
// The input URL was malformed, so indicate that
MalformedURLException newEx
= new MalformedURLException("URL " + url
+ " was malformed");
newEx.initCause(e);
throw newEx;
}
}
/**
* Constructs a legal URL for a given File.
* Unlike java, this gives you a URL which conforms to RFC1738 and
* looks like "file://localhost/abs-path
" rather than
* "file:abs-or-rel-path
".
*
* @param file file
* @return URL
* @see "RFC 1738"
*/
public static URL makeFileURL( File file ) {
try {
return fixURL( file.toURI().toURL() );
}
catch ( MalformedURLException e ) {
throw new AssertionError();
}
}
/**
* Fixes file: URLs which don't have enough slashes in them.
* Java generates invalid URLs of the form
* "file:abs-or-rel-path
"
* when it should generate "file://localhost/abs-path
".
*
* @param url input URL
* @return fixed URL
* @see "RFC 1738"
*/
public static URL fixURL( URL url ) {
Matcher matcher = FILE_URL_REGEX.matcher( url.toString() );
if ( matcher.matches() ) {
String scheme = matcher.group( 1 );
String slashes = matcher.group( 2 );
String path = matcher.group( 3 );
assert "file:".equals( scheme );
try {
switch ( slashes.length() ) {
case 0:
return fixURL( new File( path ).getAbsoluteFile()
.toURI().toURL() );
case 1:
return new URL( scheme + "//localhost"
+ slashes + path );
case 2:
return url;
default:
return url;
}
}
catch ( MalformedURLException e ) {
throw new AssertionError( e );
}
}
else {
return url;
}
}
/**
* Attempts to determine whether two URLs refer to the same resource.
* Not likely to be foolproof, but slightly smarter than using
* equals
.
*
* @param url1 first URL
* @param url2 second URL
* @return true if url1
and url2
appear to
* refer to the same resource
*/
public static boolean sameResource( URL url1, URL url2 ) {
if ( url1 == null && url2 == null ) {
return true;
}
else if ( url1 == null || url2 == null ) {
return false;
}
else if ( url1.equals( url2 ) ) {
return true;
}
else if ( url1.getProtocol().equals( "file" ) &&
url2.getProtocol().equals( "file" ) ) {
String[] strings = { url1.toString(), url2.toString() };
for ( int i = 0; i < 2; i++ ) {
strings[ i ] =
strings[ i ].replaceFirst( "^file:/*(localhost)?/*", "" );
}
return strings[ 0 ].equals( strings[ 1 ] );
}
else {
return false;
}
}
/**
* Locates the local file, if any, represented by a URL.
* If the URL string uses the "file:" protocol, and has no query or anchor
* parts, the filename will be extracted and the corresponding file
* returned. Otherwise, null is returned.
*
* @param url URL string
* @return local file referenced by url
, or null
*/
public static File urlToFile( String url ) {
URL u;
try {
u = new URL( url );
}
catch ( MalformedURLException e ) {
return null;
}
if ( u.getProtocol().equals( "file" ) && u.getRef() == null
&& u.getQuery() == null ) {
String path = u.getPath();
try {
/* Careful. URLDecoder does almost what we want, but not
* quite - it replaces "+" with " ", which is appropriate
* for application/x-www-form-urlencoded, but not for
* normal URL decoding (RFC1738). The %xy decoding is
* as required. So by converting + to its hex-escaped
* form before using URLDecoder we get what we need.
* An alternative would be to do the hex decoding by hand. */
path = URLDecoder.decode( path.replace( "+", "%2B" ), "utf-8" );
}
catch ( IllegalArgumentException e ) {
// probably a badly-formed URL - try with the undecoded form
}
catch ( UnsupportedEncodingException e ) {
// UTF-8 not accepted? fall back in this unlikely event.
assert false;
}
String filename = File.separatorChar == '/'
? path
: path.replace( '/', File.separatorChar );
return new File( filename );
}
else {
return null;
}
}
/**
* Compares two URLs. This does approximatly the same job as
* the URL.equals() method, but it avoids the possible network accesses
* associated with that implementation, and copes with null values.
*
* @param url1 first URL
* @param url2 second URL
* @return true iff both are the same, or both are null
*/
public static boolean urlEquals( URL url1, URL url2 ) {
if ( url1 == null ) {
return url2 == null;
}
else {
return url2 != null
&& url1.toString().equals( url2.toString() );
}
}
/**
* Takes a URLConnection and repeatedly follows 3xx redirects
* until a non-redirect status is achieved. Infinite loops are defended
* against. The Accept-Encoding header, if present, is propagated
* to redirect targets.
*
*
Note that the
* {@link java.net.HttpURLConnection#setInstanceFollowRedirects}
* method does something like this, but it refuses to redirect
* between different URL protocols, for security reasons
* (see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4620571).
* Considering similar arguments, this method will direct HTTP->HTTPS,
* but not vice versa.
*
* @param conn initial URL connection
* @param redirCodes list of HTTP codes for which redirects should
* be followed; if null all suitable 3xx redirections
* will be followed (301, 302, 303, 307)
* @return target URL connection
* (if no redirects, the same as hconn
)
*/
public static URLConnection followRedirects( URLConnection conn,
int[] redirCodes )
throws IOException {
if ( ! ( conn instanceof HttpURLConnection ) ) {
return conn;
}
HttpURLConnection hconn = (HttpURLConnection) conn;
Set urlSet = new HashSet();
urlSet.add( hconn.getURL() );
while ( isRedirect( hconn.getResponseCode(), redirCodes ) ) {
int hcode0 = hconn.getResponseCode();
URL url0 = hconn.getURL();
String loc = hconn.getHeaderField( "Location" );
if ( loc == null || loc.trim().length() == 0 ) {
throw new IOException( "No Location field for " + hcode0
+ " response" );
}
URL url1;
try {
url1 = new URL( loc );
}
catch ( MalformedURLException e ) {
throw (IOException)
new IOException( "Bad Location field for " + hcode0
+ " response from " + url0 )
.initCause( e );
}
if ( ! urlSet.add( url1 ) ) {
throw new IOException( "Recursive " + hcode0 + " redirect at "
+ url1 );
}
String proto0 = url0.getProtocol().toLowerCase();
String proto1 = url1.getProtocol().toLowerCase();
if ( "https".equals( proto0 ) && ! "https".equals( proto1 ) ) {
throw new IOException( "Refuse to redirect " + proto0
+ " URL to " + proto1
+ " (" + url0 + " -> " + url1 + ")" );
}
logger_.info( "HTTP " + hcode0 + " redirect to " + url1 );
URLConnection conn1 = url1.openConnection();
if ( ! ( conn1 instanceof HttpURLConnection ) ) {
return conn1;
}
/* Propagate any Accept-Encoding header, which may have been
* added by hand to the initial connection, to the redirect
* target, otherwise it will get lost. */
String acceptEncoding =
hconn.getRequestProperty( ContentCoding.ACCEPT_ENCODING );
hconn = (HttpURLConnection) conn1;
if ( acceptEncoding != null ) {
hconn.setRequestProperty( ContentCoding.ACCEPT_ENCODING,
acceptEncoding );
}
}
return hconn;
}
/**
* Indicates whether an HTTP response code should be interpreted
* as a request to redirect.
*
* @param hcode code to test
* @param redirCodes list of HTTP codes for which redirects should
* be followed; if null all suitable 3xx redirections
* will be followed (301, 302, 303, 307)
* @return true iff hcode represents a redirect
*/
private static boolean isRedirect( int hcode, int[] redirCodes ) {
int[] rcodes = redirCodes == null ? new int[] { 301, 302, 303, 307 }
: redirCodes;
for ( int i = 0; i < rcodes.length; i++ ) {
if ( hcode == rcodes[ i ] ) {
return true;
}
}
return false;
}
/**
* Attempts to install additional URL protocol handlers suitable
* for astronomy applications. Currently installs handlers which
* can supply MySpace connections using either "ivo:
" or
* "myspace:
" protocols.
*/
public static void installCustomHandlers() {
/* See if the system property which customises URL protocol handling
* has been set. If so, don't attempt to mess about further with
* the configuration. */
String pkgProp = "java.protocol.handler.pkgs";
boolean hasPkgProp;
try {
hasPkgProp = System.getProperty( pkgProp, "" ).length() > 0;
}
catch ( SecurityException e ) {
hasPkgProp = false;
}
if ( hasPkgProp ) {
logger_.config( pkgProp + " is set - don't further configure "
+ "URL protocol handlers" );
return;
}
/* Set up a handler factory which deals with myspace. This is
* equivalent to setting the java.protocol.handler.pkgs system
* property to "uk.ac.starlink.astrogrid.protocols", but the
* latter can only be done before starting up the JVM. */
Map handlerMap = new HashMap();
String[] protos = new String[] { "ivo", "myspace", };
for ( String proto : protos ) {
handlerMap.put( proto,
"uk.ac.starlink.astrogrid.protocols."
+ proto + ".Handler" );
}
URLStreamHandlerFactory fact =
new CustomURLStreamHandlerFactory( handlerMap );
/* Attempt to install the custom handler. */
try {
URL.setURLStreamHandlerFactory( fact );
logger_.config( "Set up URL custom protocol handlers " +
Arrays.asList( protos ) );
}
catch ( Throwable e ) {
logger_.warning( "Can't set custom URL protocol handlers: " + e );
}
}
}