HTTPClient.URI Maven / Gradle / Ivy
Show all versions of grinder-httpclient Show documentation
/*
* @(#)URI.java 0.3-3 06/05/2001
*
* This file is part of the HTTPClient package
* Copyright (C) 1996-2001 Ronald Tschalär
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307, USA
*
* For questions, suggestions, bug-reports, enhancement-requests etc.
* I may be contacted at:
*
* [email protected]
*
* The HTTPClient's home page is located at:
*
* http://www.innovation.ch/java/HTTPClient/
*
*/
package HTTPClient;
import java.net.URL;
import java.net.MalformedURLException;
import java.util.BitSet;
import java.util.Hashtable;
/**
* This class represents a generic URI, as defined in RFC-2396.
* This is similar to java.net.URL, with the following enhancements:
*
* - it doesn't require a URLStreamhandler to exist for the scheme; this
* allows this class to be used to hold any URI, construct absolute
* URIs from relative ones, etc.
*
- it handles escapes correctly
*
- equals() works correctly
*
- relative URIs are correctly constructed
*
- it has methods for accessing various fields such as userinfo,
* fragment, params, etc.
*
- it handles less common forms of resources such as the "*" used in
* http URLs.
*
*
* The elements are always stored in escaped form.
*
*
While RFC-2396 distinguishes between just two forms of URI's, those that
* follow the generic syntax and those that don't, this class knows about a
* third form, named semi-generic, used by quite a few popular schemes.
* Semi-generic syntax treats the path part as opaque, i.e. has the form
* <scheme>://<authority>/<opaque> . Relative URI's of this
* type are only resolved as far as absolute paths - relative paths do not
* exist.
*
*
Ideally, java.net.URL should subclass URI.
*
* @see rfc-2396
* @version 0.3-3 06/05/2001
* @author Ronald Tschalär
* @since V0.3-1
*/
public class URI
{
/**
* If true, then the parser will resolve certain URI's in backwards
* compatible (but technically incorrect) manner. Example:
*
*
* base = http://a/b/c/d;p?q
* rel = http:g
* result = http:g (correct)
* result = http://a/b/c/g (backwards compatible)
*
*
* See rfc-2396, section 5.2, step 3, second paragraph.
*/
public static final boolean ENABLE_BACKWARDS_COMPATIBILITY = true;
protected static final Hashtable defaultPorts = new Hashtable();
protected static final Hashtable usesGenericSyntax = new Hashtable();
protected static final Hashtable usesSemiGenericSyntax = new Hashtable();
/* various character classes as defined in the draft */
protected static final BitSet alphanumChar;
protected static final BitSet markChar;
protected static final BitSet reservedChar;
protected static final BitSet unreservedChar;
protected static final BitSet uricChar;
protected static final BitSet pcharChar;
protected static final BitSet userinfoChar;
protected static final BitSet schemeChar;
protected static final BitSet hostChar;
protected static final BitSet opaqueChar;
protected static final BitSet reg_nameChar;
/* These are not directly in the spec, but used for escaping and
* unescaping parts
*/
/** list of characters which must not be unescaped when unescaping a scheme */
public static final BitSet resvdSchemeChar;
/** list of characters which must not be unescaped when unescaping a userinfo */
public static final BitSet resvdUIChar;
/** list of characters which must not be unescaped when unescaping a host */
public static final BitSet resvdHostChar;
/** list of characters which must not be unescaped when unescaping a path */
public static final BitSet resvdPathChar;
/** list of characters which must not be unescaped when unescaping a query string */
public static final BitSet resvdQueryChar;
/** list of characters which must not be escaped when escaping a path */
public static final BitSet escpdPathChar;
/** list of characters which must not be escaped when escaping a query string */
public static final BitSet escpdQueryChar;
/** list of characters which must not be escaped when escaping a fragment identifier */
public static final BitSet escpdFragChar;
static
{
defaultPorts.put("http", new Integer(80));
defaultPorts.put("shttp", new Integer(80));
defaultPorts.put("http-ng", new Integer(80));
defaultPorts.put("coffee", new Integer(80));
defaultPorts.put("https", new Integer(443));
defaultPorts.put("ftp", new Integer(21));
defaultPorts.put("telnet", new Integer(23));
defaultPorts.put("nntp", new Integer(119));
defaultPorts.put("news", new Integer(119));
defaultPorts.put("snews", new Integer(563));
defaultPorts.put("hnews", new Integer(80));
defaultPorts.put("smtp", new Integer(25));
defaultPorts.put("gopher", new Integer(70));
defaultPorts.put("wais", new Integer(210));
defaultPorts.put("whois", new Integer(43));
defaultPorts.put("whois++", new Integer(63));
defaultPorts.put("rwhois", new Integer(4321));
defaultPorts.put("imap", new Integer(143));
defaultPorts.put("pop", new Integer(110));
defaultPorts.put("prospero", new Integer(1525));
defaultPorts.put("irc", new Integer(194));
defaultPorts.put("ldap", new Integer(389));
defaultPorts.put("nfs", new Integer(2049));
defaultPorts.put("z39.50r", new Integer(210));
defaultPorts.put("z39.50s", new Integer(210));
defaultPorts.put("vemmi", new Integer(575));
defaultPorts.put("videotex", new Integer(516));
defaultPorts.put("cmp", new Integer(829));
usesGenericSyntax.put("http", Boolean.TRUE);
usesGenericSyntax.put("https", Boolean.TRUE);
usesGenericSyntax.put("shttp", Boolean.TRUE);
usesGenericSyntax.put("coffee", Boolean.TRUE);
usesGenericSyntax.put("ftp", Boolean.TRUE);
usesGenericSyntax.put("file", Boolean.TRUE);
usesGenericSyntax.put("nntp", Boolean.TRUE);
usesGenericSyntax.put("news", Boolean.TRUE);
usesGenericSyntax.put("snews", Boolean.TRUE);
usesGenericSyntax.put("hnews", Boolean.TRUE);
usesGenericSyntax.put("imap", Boolean.TRUE);
usesGenericSyntax.put("wais", Boolean.TRUE);
usesGenericSyntax.put("nfs", Boolean.TRUE);
usesGenericSyntax.put("sip", Boolean.TRUE);
usesGenericSyntax.put("sips", Boolean.TRUE);
usesGenericSyntax.put("sipt", Boolean.TRUE);
usesGenericSyntax.put("sipu", Boolean.TRUE);
/* Note: schemes which definitely don't use the generic-URI syntax
* and must therefore never appear in the above list:
* "urn", "mailto", "sdp", "service", "tv", "gsm-sms", "tel", "fax",
* "modem", "eid", "cid", "mid", "data", "ldap"
*/
usesSemiGenericSyntax.put("ldap", Boolean.TRUE);
usesSemiGenericSyntax.put("irc", Boolean.TRUE);
usesSemiGenericSyntax.put("gopher", Boolean.TRUE);
usesSemiGenericSyntax.put("videotex", Boolean.TRUE);
usesSemiGenericSyntax.put("rwhois", Boolean.TRUE);
usesSemiGenericSyntax.put("whois++", Boolean.TRUE);
usesSemiGenericSyntax.put("smtp", Boolean.TRUE);
usesSemiGenericSyntax.put("telnet", Boolean.TRUE);
usesSemiGenericSyntax.put("prospero", Boolean.TRUE);
usesSemiGenericSyntax.put("pop", Boolean.TRUE);
usesSemiGenericSyntax.put("vemmi", Boolean.TRUE);
usesSemiGenericSyntax.put("z39.50r", Boolean.TRUE);
usesSemiGenericSyntax.put("z39.50s", Boolean.TRUE);
usesSemiGenericSyntax.put("stream", Boolean.TRUE);
usesSemiGenericSyntax.put("cmp", Boolean.TRUE);
alphanumChar = new BitSet(128);
for (int ch='0'; ch<='9'; ch++) alphanumChar.set(ch);
for (int ch='A'; ch<='Z'; ch++) alphanumChar.set(ch);
for (int ch='a'; ch<='z'; ch++) alphanumChar.set(ch);
markChar = new BitSet(128);
markChar.set('-');
markChar.set('_');
markChar.set('.');
markChar.set('!');
markChar.set('~');
markChar.set('*');
markChar.set('\'');
markChar.set('(');
markChar.set(')');
reservedChar = new BitSet(128);
reservedChar.set(';');
reservedChar.set('/');
reservedChar.set('?');
reservedChar.set(':');
reservedChar.set('@');
reservedChar.set('&');
reservedChar.set('=');
reservedChar.set('+');
reservedChar.set('$');
reservedChar.set(',');
unreservedChar = new BitSet(128);
unreservedChar.or(alphanumChar);
unreservedChar.or(markChar);
uricChar = new BitSet(128);
uricChar.or(unreservedChar);
uricChar.or(reservedChar);
uricChar.set('%');
pcharChar = new BitSet(128);
pcharChar.or(unreservedChar);
pcharChar.set('%');
pcharChar.set(':');
pcharChar.set('@');
pcharChar.set('&');
pcharChar.set('=');
pcharChar.set('+');
pcharChar.set('$');
pcharChar.set(',');
userinfoChar = new BitSet(128);
userinfoChar.or(unreservedChar);
userinfoChar.set('%');
userinfoChar.set(';');
userinfoChar.set(':');
userinfoChar.set('&');
userinfoChar.set('=');
userinfoChar.set('+');
userinfoChar.set('$');
userinfoChar.set(',');
// this actually shouldn't contain uppercase letters...
schemeChar = new BitSet(128);
schemeChar.or(alphanumChar);
schemeChar.set('+');
schemeChar.set('-');
schemeChar.set('.');
opaqueChar = new BitSet(128);
opaqueChar.or(uricChar);
hostChar = new BitSet(128);
hostChar.or(alphanumChar);
hostChar.set('-');
hostChar.set('.');
reg_nameChar = new BitSet(128);
reg_nameChar.or(unreservedChar);
reg_nameChar.set('$');
reg_nameChar.set(',');
reg_nameChar.set(';');
reg_nameChar.set(':');
reg_nameChar.set('@');
reg_nameChar.set('&');
reg_nameChar.set('=');
reg_nameChar.set('+');
resvdSchemeChar = new BitSet(128);
resvdSchemeChar.set(':');
resvdUIChar = new BitSet(128);
resvdUIChar.set('@');
resvdHostChar = new BitSet(128);
resvdHostChar.set(':');
resvdHostChar.set('/');
resvdHostChar.set('?');
resvdHostChar.set('#');
resvdPathChar = new BitSet(128);
resvdPathChar.set('/');
resvdPathChar.set(';');
resvdPathChar.set('?');
resvdPathChar.set('#');
resvdQueryChar = new BitSet(128);
resvdQueryChar.set('#');
escpdPathChar = new BitSet(128);
escpdPathChar.or(pcharChar);
escpdPathChar.set('%');
escpdPathChar.set('/');
escpdPathChar.set(';');
escpdQueryChar = new BitSet(128);
escpdQueryChar.or(uricChar);
escpdQueryChar.clear('#');
escpdFragChar = new BitSet(128);
escpdFragChar.or(uricChar);
}
/* our uri in pieces */
protected static final int OPAQUE = 0;
protected static final int SEMI_GENERIC = 1;
protected static final int GENERIC = 2;
protected int type;
protected String scheme;
protected String opaque;
protected String userinfo;
protected String host;
protected int port = -1;
protected String path;
protected String query;
protected String fragment;
/* cache the java.net.URL */
protected URL url = null;
// Constructors
/**
* Constructs a URI from the given string representation. The string
* must be an absolute URI.
*
* @param uri a String containing an absolute URI
* @exception ParseException if no scheme can be found or a specified
* port cannot be parsed as a number
*/
public URI(String uri) throws ParseException
{
this((URI) null, uri);
}
/**
* Constructs a URI from the given string representation, relative to
* the given base URI.
*
* @param base the base URI, relative to which rel_uri
* is to be parsed
* @param rel_uri a String containing a relative or absolute URI
* @exception ParseException if base is null and
* rel_uri is not an absolute URI, or
* if base is not null and the scheme
* is not known to use the generic syntax, or
* if a given port cannot be parsed as a number
*/
public URI(URI base, String rel_uri) throws ParseException
{
/* Parsing is done according to the following RE:
*
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
* 12 3 4 5 6 7 8 9
*
* 2: scheme
* 4: authority
* 5: path
* 7: query
* 9: fragment
*/
char[] uri = rel_uri.toCharArray();
int pos = 0, idx, len = uri.length;
// trim()
while (pos < len && Character.isWhitespace(uri[pos])) pos++;
while (len > 0 && Character.isWhitespace(uri[len-1])) len--;
// strip the special "url" or "uri" scheme
if (pos < len-3 && uri[pos+3] == ':' &&
(uri[pos+0] == 'u' || uri[pos+0] == 'U') &&
(uri[pos+1] == 'r' || uri[pos+1] == 'R') &&
(uri[pos+2] == 'i' || uri[pos+2] == 'I' ||
uri[pos+2] == 'l' || uri[pos+2] == 'L'))
pos += 4;
// get scheme: (([^:/?#]+):)?
idx = pos;
while (idx < len && uri[idx] != ':' && uri[idx] != '/' &&
uri[idx] != '?' && uri[idx] != '#')
idx++;
if (idx < len && uri[idx] == ':')
{
scheme = rel_uri.substring(pos, idx).trim().toLowerCase();
pos = idx + 1;
}
// check and resolve scheme
String final_scheme = scheme;
if (scheme == null)
{
if (base == null)
throw new ParseException("No scheme found");
final_scheme = base.scheme;
}
// check for generic vs. opaque
type = usesGenericSyntax(final_scheme) ? GENERIC :
usesSemiGenericSyntax(final_scheme) ? SEMI_GENERIC : OPAQUE;
if (type == OPAQUE)
{
if (base != null && scheme == null)
throw new ParseException("Can't resolve relative URI for " +
"scheme " + final_scheme);
opaque = escape(rel_uri.substring(pos), opaqueChar, true);
if (opaque.length() > 0 && opaque.charAt(0) == '/')
opaque = "%2F" + opaque.substring(1);
return;
}
// get authority: (//([^/?#]*))?
if (pos+1 < len && uri[pos] == '/' && uri[pos+1] == '/')
{
pos += 2;
idx = pos;
while (idx < len && uri[idx] != '/' && uri[idx] != '?' &&
uri[idx] != '#')
idx++;
parse_authority(rel_uri.substring(pos, idx), final_scheme);
pos = idx;
}
// handle semi-generic and generic uri's
if (type == SEMI_GENERIC)
{
path = escape(rel_uri.substring(pos), uricChar, true);
if (path.length() > 0 && path.charAt(0) != '/')
path = '/' + path;
}
else
{
// get path: ([^?#]*)
idx = pos;
while (idx < len && uri[idx] != '?' && uri[idx] != '#')
idx++;
path = escape(rel_uri.substring(pos, idx), escpdPathChar, true);
pos = idx;
// get query: (\?([^#]*))?
if (pos < len && uri[pos] == '?')
{
pos += 1;
idx = pos;
while (idx < len && uri[idx] != '#')
idx++;
this.query = escape(rel_uri.substring(pos, idx), escpdQueryChar, true);
pos = idx;
}
// get fragment: (#(.*))?
if (pos < len && uri[pos] == '#')
this.fragment = escape(rel_uri.substring(pos+1, len), escpdFragChar, true);
}
// now resolve the parts relative to the base
if (base != null)
{
if (scheme != null && // resolve scheme
!(scheme.equals(base.scheme) && ENABLE_BACKWARDS_COMPATIBILITY))
return;
scheme = base.scheme;
if (host != null) // resolve authority
return;
userinfo = base.userinfo;
host = base.host;
port = base.port;
if (type == SEMI_GENERIC) // can't resolve relative paths
return;
if (path.length() == 0 && query == null) // current doc
{
path = base.path;
query = base.query;
return;
}
if (path.length() == 0 || path.charAt(0) != '/') // relative path
{
idx = (base.path != null) ? base.path.lastIndexOf('/') : -1;
if (idx < 0)
path = '/' + path;
else
path = base.path.substring(0, idx+1) + path;
path = canonicalizePath(path);
}
}
}
/**
* Remove all "/../" and "/./" from path, where possible. Leading "/../"'s
* are not removed.
*
* @param path the path to canonicalize
* @return the canonicalized path
*/
public static String canonicalizePath(String path)
{
int idx, len = path.length();
if (!((idx = path.indexOf("/.")) != -1 &&
(idx == len-2 || path.charAt(idx+2) == '/' ||
(path.charAt(idx+2) == '.' &&
(idx == len-3 || path.charAt(idx+3) == '/')) )))
return path;
char[] p = new char[path.length()]; // clean path
path.getChars(0, p.length, p, 0);
int beg = 0;
for (idx=1; idx beg && p[end] != '/') end--;
if (p[end] != '/') continue;
if (idx == len-2) end++;
idx += 2;
}
else
continue;
System.arraycopy(p, idx, p, end, len-idx);
len -= idx - end;
idx = end;
}
}
return new String(p, 0, len);
}
/**
* Parse the authority specific part
*/
private void parse_authority(String authority, String scheme)
throws ParseException
{
/* The authority is further parsed according to:
*
* ^(([^@]*)@?)(\[[^]]*\]|[^:]*)?(:(.*))?
* 12 3 4 5
*
* 2: userinfo
* 3: host
* 5: port
*/
char[] uri = authority.toCharArray();
int pos = 0, idx, len = uri.length;
// get userinfo: (([^@]*)@?)
idx = pos;
while (idx < len && uri[idx] != '@')
idx++;
if (idx < len && uri[idx] == '@')
{
this.userinfo = escape(authority.substring(pos, idx), userinfoChar, true);
pos = idx + 1;
}
// get host: (\[[^]]*\]|[^:]*)?
idx = pos;
if (idx < len && uri[idx] == '[') // IPv6
{
while (idx < len && uri[idx] != ']')
idx++;
if (idx == len)
throw new ParseException("No closing ']' found for opening '['"+
" at position " + pos +
" in authority `" + authority + "'");
this.host = authority.substring(pos+1, idx);
idx++;
}
else
{
while (idx < len && uri[idx] != ':')
idx++;
this.host = escape(authority.substring(pos, idx), uricChar, true);
}
pos = idx;
// get port: (:(.*))?
if (pos < (len-1) && uri[pos] == ':')
{
int p;
try
{
p = Integer.parseInt(
unescape(authority.substring(pos+1, len), null));
if (p < 0) throw new NumberFormatException();
}
catch (NumberFormatException e)
{
throw new ParseException(authority.substring(pos+1, len) +
" is an invalid port number");
}
if (p == defaultPort(scheme))
this.port = -1;
else
this.port = p;
}
}
/**
* Construct a URI from the given URL.
*
* @param url the URL
* @exception ParseException if url.toExternalForm()
generates
* an invalid string representation
*/
public URI(URL url) throws ParseException
{
this((URI) null, url.toExternalForm());
}
/**
* Constructs a URI from the given parts, using the default port for
* this scheme (if known). The parts must be in unescaped form.
*
* @param scheme the scheme (sometimes known as protocol)
* @param host the host
* @param path the path part
* @exception ParseException if scheme is null
*/
public URI(String scheme, String host, String path) throws ParseException
{
this(scheme, null, host, -1, path, null, null);
}
/**
* Constructs a URI from the given parts. The parts must be in unescaped
* form.
*
* @param scheme the scheme (sometimes known as protocol)
* @param host the host
* @param port the port
* @param path the path part
* @exception ParseException if scheme is null
*/
public URI(String scheme, String host, int port, String path)
throws ParseException
{
this(scheme, null, host, port, path, null, null);
}
/**
* Constructs a URI from the given parts. Any part except for the
* the scheme may be null. The parts must be in unescaped form.
*
* @param scheme the scheme (sometimes known as protocol)
* @param userinfo the userinfo
* @param host the host
* @param port the port
* @param path the path part
* @param query the query string
* @param fragment the fragment identifier
* @exception ParseException if scheme is null
*/
public URI(String scheme, String userinfo, String host, int port,
String path, String query, String fragment)
throws ParseException
{
if (scheme == null)
throw new ParseException("missing scheme");
this.scheme = escape(scheme.trim().toLowerCase(), schemeChar, true);
if (userinfo != null)
this.userinfo = escape(userinfo.trim(), userinfoChar, true);
if (host != null)
{
host = host.trim();
this.host = isIPV6Addr(host) ? host : escape(host, hostChar, true);
}
if (port != defaultPort(scheme))
this.port = port;
if (path != null)
this.path = escape(path.trim(), escpdPathChar, true); // ???
if (query != null)
this.query = escape(query.trim(), escpdQueryChar, true);
if (fragment != null)
this.fragment = escape(fragment.trim(), escpdFragChar, true);
type = usesGenericSyntax(scheme) ? GENERIC : SEMI_GENERIC;
}
private static final boolean isIPV6Addr(String host)
{
if (host.indexOf(':') < 0)
return false;
for (int idx=0; idx '9') && ch != ':')
return false;
}
return true;
}
/**
* Constructs an opaque URI from the given parts.
*
* @param scheme the scheme (sometimes known as protocol)
* @param opaque the opaque part
* @exception ParseException if scheme is null
*/
public URI(String scheme, String opaque)
throws ParseException
{
if (scheme == null)
throw new ParseException("missing scheme");
this.scheme = escape(scheme.trim().toLowerCase(), schemeChar, true);
this.opaque = escape(opaque, opaqueChar, true);
type = OPAQUE;
}
// Class Methods
/**
* @return true if the scheme should be parsed according to the
* generic-URI syntax
*/
public static boolean usesGenericSyntax(String scheme)
{
return usesGenericSyntax.containsKey(scheme.trim().toLowerCase());
}
/**
* @return true if the scheme should be parsed according to a
* semi-generic-URI syntax <scheme&tgt;://<hostport>/<opaque>
*/
public static boolean usesSemiGenericSyntax(String scheme)
{
return usesSemiGenericSyntax.containsKey(scheme.trim().toLowerCase());
}
/**
* Return the default port used by a given protocol.
*
* @param protocol the protocol
* @return the port number, or 0 if unknown
*/
public final static int defaultPort(String protocol)
{
Integer port = (Integer) defaultPorts.get(protocol.trim().toLowerCase());
return (port != null) ? port.intValue() : 0;
}
// Instance Methods
/**
* @return the scheme (often also referred to as protocol)
*/
public String getScheme()
{
return scheme;
}
/**
* @return the opaque part, or null if this URI is generic
*/
public String getOpaque()
{
return opaque;
}
/**
* @return the host
*/
public String getHost()
{
return host;
}
/**
* @return the port, or -1 if it's the default port, or 0 if unknown
*/
public int getPort()
{
return port;
}
/**
* @return the user info
*/
public String getUserinfo()
{
return userinfo;
}
/**
* @return the path
*/
public String getPath()
{
return path;
}
/**
* @return the query string
*/
public String getQueryString()
{
return query;
}
/**
* @return the path and query
*/
public String getPathAndQuery()
{
if (query == null)
return path;
if (path == null)
return "?" + query;
return path + "?" + query;
}
/**
* @return the fragment
*/
public String getFragment()
{
return fragment;
}
/**
* Does the scheme specific part of this URI use the generic-URI syntax?
*
* In general URI are split into two categories: opaque-URI and
* generic-URI. The generic-URI syntax is the syntax most are familiar
* with from URLs such as ftp- and http-URLs, which is roughly:
*
* generic-URI = scheme ":" [ "//" server ] [ "/" ] [ path_segments ] [ "?" query ]
*
* (see RFC-2396 for exact syntax). Only URLs using the generic-URI syntax
* can be used to create and resolve relative URIs.
*
* Whether a given scheme is parsed according to the generic-URI
* syntax or wether it is treated as opaque is determined by an internal
* table of URI schemes.
*
* @see rfc-2396
*/
public boolean isGenericURI()
{
return (type == GENERIC);
}
/**
* Does the scheme specific part of this URI use the semi-generic-URI syntax?
*
*
Many schemes which don't follow the full generic syntax actually
* follow a reduced form where the path part is treated is opaque. This
* is used for example by ldap, smtp, pop, etc, and is roughly
*
* generic-URI = scheme ":" [ "//" server ] [ "/" [ opaque_path ] ]
*
* I.e. parsing is identical to the generic-syntax, except that the path
* part is not further parsed. URLs using the semi-generic-URI syntax can
* be used to create and resolve relative URIs with the restriction that
* all paths are treated as absolute.
*
* Whether a given scheme is parsed according to the semi-generic-URI
* syntax is determined by an internal table of URI schemes.
*
* @see #isGenericURI()
*/
public boolean isSemiGenericURI()
{
return (type == SEMI_GENERIC);
}
/**
* Will try to create a java.net.URL object from this URI.
*
* @return the URL
* @exception MalformedURLException if no handler is available for the
* scheme
*/
public URL toURL() throws MalformedURLException
{
if (url != null) return url;
if (opaque != null)
return (url = new URL(scheme + ":" + opaque));
String hostinfo;
if (userinfo != null && host != null)
hostinfo = userinfo + "@" + host;
else if (userinfo != null)
hostinfo = userinfo + "@";
else
hostinfo = host;
StringBuffer file = new StringBuffer(100);
assemblePath(file, true, true, false);
url = new URL(scheme, hostinfo, port, file.toString());
return url;
}
private final void assemblePath(StringBuffer buf, boolean printEmpty,
boolean incFragment, boolean unescape)
{
if ((path == null || path.length() == 0) && printEmpty)
buf.append('/');
if (path != null)
buf.append(unescape ? unescapeNoPE(path, resvdPathChar) : path);
if (query != null)
{
buf.append('?');
buf.append(unescape ? unescapeNoPE(query, resvdQueryChar) : query);
}
if (fragment != null && incFragment)
{
buf.append('#');
buf.append(unescape ? unescapeNoPE(fragment, null) : fragment);
}
}
private final String stringify(boolean unescape)
{
StringBuffer uri = new StringBuffer(100);
if (scheme != null)
{
uri.append(unescape ? unescapeNoPE(scheme, resvdSchemeChar) : scheme);
uri.append(':');
}
if (opaque != null) // it's an opaque-uri
{
uri.append(unescape ? unescapeNoPE(opaque, null) : opaque);
return uri.toString();
}
if (userinfo != null || host != null || port != -1)
uri.append("//");
if (userinfo != null)
{
uri.append(unescape ? unescapeNoPE(userinfo, resvdUIChar) : userinfo);
uri.append('@');
}
if (host != null)
{
if (host.indexOf(':') < 0)
uri.append(unescape ? unescapeNoPE(host, resvdHostChar) : host);
else
uri.append('[').append(host).append(']');
}
if (port != -1)
{
uri.append(':');
uri.append(port);
}
assemblePath(uri, false, true, unescape);
return uri.toString();
}
/**
* @return a string representation of this URI suitable for use in
* links, headers, etc.
*/
public String toExternalForm()
{
return stringify(false);
}
/**
* Return the URI as string. This differs from toExternalForm() in that
* all elements are unescaped before assembly. This is not suitable
* for passing to other apps or in header fields and such, and is usually
* not what you want.
*
* @return the URI as a string
* @see #toExternalForm()
*/
public String toString()
{
return stringify(true);
}
/**
* @return true if other is either a URI or URL and it
* matches the current URI
*/
public boolean equals(Object other)
{
if (other instanceof URI)
{
URI o = (URI) other;
return (scheme.equals(o.scheme) &&
(
type == OPAQUE && areEqual(opaque, o.opaque) ||
type == SEMI_GENERIC &&
areEqual(userinfo, o.userinfo) &&
areEqualIC(host, o.host) &&
port == o.port &&
areEqual(path, o.path) ||
type == GENERIC &&
areEqual(userinfo, o.userinfo) &&
areEqualIC(host, o.host) &&
port == o.port &&
pathsEqual(path, o.path) &&
areEqual(query, o.query) &&
areEqual(fragment, o.fragment)
));
}
if (other instanceof URL)
{
URL o = (URL) other;
String h, f;
if (userinfo != null)
h = userinfo + "@" + host;
else
h = host;
f = getPathAndQuery();
return (scheme.equalsIgnoreCase(o.getProtocol()) &&
(type == OPAQUE && opaque.equals(o.getFile()) ||
type == SEMI_GENERIC &&
areEqualIC(h, o.getHost()) &&
(port == o.getPort() ||
o.getPort() == defaultPort(scheme)) &&
areEqual(f, o.getFile()) ||
type == GENERIC &&
areEqualIC(h, o.getHost()) &&
(port == o.getPort() ||
o.getPort() == defaultPort(scheme)) &&
pathsEqual(f, o.getFile()) &&
areEqual(fragment, o.getRef())
)
);
}
return false;
}
private static final boolean areEqual(String s1, String s2)
{
return (s1 == null && s2 == null ||
s1 != null && s2 != null &&
(s1.equals(s2) ||
unescapeNoPE(s1, null).equals(unescapeNoPE(s2, null)))
);
}
private static final boolean areEqualIC(String s1, String s2)
{
return (s1 == null && s2 == null ||
s1 != null && s2 != null &&
(s1.equalsIgnoreCase(s2) ||
unescapeNoPE(s1, null).equalsIgnoreCase(unescapeNoPE(s2, null)))
);
}
private static final boolean pathsEqual(String p1, String p2)
{
if (p1 == null && p2 == null)
return true;
if (p1 == null || p2 == null)
return false;
if (p1.equals(p2))
return true;
// ok, so it wasn't that simple. Let's split into parts and compare
// unescaped.
int pos1 = 0, end1 = p1.length(), pos2 = 0, end2 = p2.length();
while (pos1 < end1 && pos2 < end2)
{
int start1 = pos1, start2 = pos2;
char ch;
while (pos1 < end1 && (ch = p1.charAt(pos1)) != '/' && ch != ';')
pos1++;
while (pos2 < end2 && (ch = p2.charAt(pos2)) != '/' && ch != ';')
pos2++;
if (pos1 == end1 && pos2 < end2 ||
pos2 == end2 && pos1 < end1 ||
pos1 < end1 && pos2 < end2 && p1.charAt(pos1) != p2.charAt(pos2))
return false;
if ((!p1.regionMatches(start1, p2, start2, pos1-start1) || (pos1-start1) != (pos2-start2)) &&
!unescapeNoPE(p1.substring(start1, pos1), null).equals(unescapeNoPE(p2.substring(start2, pos2), null)))
return false;
pos1++;
pos2++;
}
return (pos1 == end1 && pos2 == end2);
}
private int hashCode = -1;
/**
* The hash code is calculated over scheme, host, path, and query.
*
* @return the hash code
*/
public int hashCode()
{
if (hashCode == -1)
hashCode = (scheme != null ? unescapeNoPE(scheme, null).hashCode() : 0) +
(type == OPAQUE ?
(opaque != null ? unescapeNoPE(opaque, null).hashCode() : 0) * 7
: (host != null ? unescapeNoPE(host, null).toLowerCase().hashCode() : 0) * 7 +
(path != null ? unescapeNoPE(path, null).hashCode() : 0) * 13 +
(query != null ? unescapeNoPE(query, null).hashCode() : 0) * 17);
return hashCode;
}
/**
* Escape any character not in the given character class. Characters
* greater 255 are always escaped according to ??? .
*
* @param elem the string to escape
* @param allowed_char the BitSet of all allowed characters
* @param utf8 if true, will first UTF-8 encode unallowed characters
* @return the string with all characters not in allowed_char
* escaped
*/
public static String escape(String elem, BitSet allowed_char, boolean utf8)
{
return new String(escape(elem.toCharArray(), allowed_char, utf8));
}
/**
* Escape any character not in the given character class. Characters
* greater 255 are always escaped according to ??? .
*
* @param elem the array of characters to escape
* @param allowed_char the BitSet of all allowed characters
* @param utf8 if true, will first UTF-8 encode unallowed characters
* @return the elem array with all characters not in allowed_char
* escaped
*/
public static char[] escape(char[] elem, BitSet allowed_char, boolean utf8)
{
int cnt=0;
for (int idx=0; idx= 0x0080)
cnt += 3;
if (elem[idx] >= 0x00800)
cnt += 3;
if ((elem[idx] & 0xFC00) == 0xD800 && idx+1 < elem.length &&
(elem[idx+1] & 0xFC00) == 0xDC00)
cnt -= 6;
}
}
}
if (cnt == 0) return elem;
char[] tmp = new char[elem.length + cnt];
for (int idx=0, pos=0; idx> 6) & 0x1F));
pos = enc(tmp, pos, 0x80 | ((c >> 0) & 0x3F));
}
else if (!((c & 0xFC00) == 0xD800 && idx+1 < elem.length &&
(elem[idx+1] & 0xFC00) == 0xDC00))
{
pos = enc(tmp, pos, 0xE0 | ((c >> 12) & 0x0F));
pos = enc(tmp, pos, 0x80 | ((c >> 6) & 0x3F));
pos = enc(tmp, pos, 0x80 | ((c >> 0) & 0x3F));
}
else
{
int ch = ((c & 0x03FF) << 10) | (elem[++idx] & 0x03FF);
ch += 0x10000;
pos = enc(tmp, pos, 0xF0 | ((ch >> 18) & 0x07));
pos = enc(tmp, pos, 0x80 | ((ch >> 12) & 0x3F));
pos = enc(tmp, pos, 0x80 | ((ch >> 6) & 0x3F));
pos = enc(tmp, pos, 0x80 | ((ch >> 0) & 0x3F));
}
}
else
pos = enc(tmp, pos, c);
}
return tmp;
}
private static final char[] hex =
{'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
private static final int enc(char[] out, int pos, int c)
{
out[pos++] = '%';
out[pos++] = hex[(c >> 4) & 0xf];
out[pos++] = hex[c & 0xf];
return pos;
}
/**
* Unescape escaped characters (i.e. %xx) except reserved ones.
*
* @param str the string to unescape
* @param reserved the characters which may not be unescaped, or null
* @return the unescaped string
* @exception ParseException if the two digits following a `%' are
* not a valid hex number
*/
public static final String unescape(String str, BitSet reserved)
throws ParseException
{
if (str == null || str.indexOf('%') == -1)
return str; // an optimization
char[] buf = str.toCharArray();
char[] res = new char[buf.length];
char[] utf = new char[4];
int utf_idx = 0, utf_len = -1;
int didx = 0;
for (int sidx=0; sidx buf.length)
throw new NumberFormatException();
ch = Integer.parseInt(str.substring(sidx+1,sidx+3), 16);
if (ch < 0)
throw new NumberFormatException();
sidx += 2;
}
catch (NumberFormatException e)
{
/* Hmm, people not reading specs again, so we just
* ignore it...
throw new ParseException(str.substring(sidx,sidx+3) +
" is an invalid code");
*/
ch = buf[sidx];
}
// check if we're working on a utf-char
if (utf_len > 0)
{
if ((ch & 0xC0) != 0x80) // oops, we misinterpreted
{
didx = copyBuf(utf, utf_idx, ch, res, didx, reserved, false);
utf_len = -1;
}
else if (utf_idx == utf_len - 1) // end-of-char
{
if ((utf[0] & 0xE0) == 0xC0)
ch = (utf[0] & 0x1F) << 6 |
(ch & 0x3F);
else if ((utf[0] & 0xF0) == 0xE0)
ch = (utf[0] & 0x0F) << 12 |
(utf[1] & 0x3F) << 6 |
(ch & 0x3F);
else
ch = (utf[0] & 0x07) << 18 |
(utf[1] & 0x3F) << 12 |
(utf[2] & 0x3F) << 6 |
(ch & 0x3F);
if (reserved != null && reserved.get(ch))
didx = copyBuf(utf, utf_idx, ch, res, didx, null, true);
else if (utf_len < 4)
res[didx++] = (char) ch;
else
{
ch -= 0x10000;
res[didx++] = (char) ((ch >> 10) | 0xD800);
res[didx++] = (char) ((ch & 0x03FF) | 0xDC00);
}
utf_len = -1;
}
else // continue
utf[utf_idx++] = (char) ch;
}
// check if this is the start of a utf-char
else if ((ch & 0xE0) == 0xC0 || (ch & 0xF0) == 0xE0 ||
(ch & 0xF8) == 0xF0)
{
if ((ch & 0xE0) == 0xC0)
utf_len = 2;
else if ((ch & 0xF0) == 0xE0)
utf_len = 3;
else
utf_len = 4;
utf[0] = (char) ch;
utf_idx = 1;
}
// leave reserved alone
else if (reserved != null && reserved.get(ch))
{
res[didx++] = buf[sidx];
sidx -= 2;
}
// just use the decoded version
else
res[didx++] = (char) ch;
}
else if (utf_len > 0) // oops, we misinterpreted
{
didx = copyBuf(utf, utf_idx, buf[sidx], res, didx, reserved, false);
utf_len = -1;
}
else
res[didx++] = buf[sidx];
}
if (utf_len > 0) // oops, we misinterpreted
didx = copyBuf(utf, utf_idx, -1, res, didx, reserved, false);
return new String(res, 0, didx);
}
private static final int copyBuf(char[] utf, int utf_idx, int ch,
char[] res, int didx, BitSet reserved,
boolean escapeAll)
{
if (ch >= 0)
utf[utf_idx++] = (char) ch;
for (int idx=0; idx" + nl +
" rel-URI = <" + relURI + ">" + nl+
" expected <" + result + ">" + nl+
" but got <" + new URI(base, relURI) + ">");
}
}
private static void testEqual(String one, String two) throws Exception
{
URI u1 = new URI(one);
URI u2 = new URI(two);
if (!u1.equals(u2))
{
throw new Exception("Test failed: " + nl +
" <" + one + "> != <" + two + ">");
}
if (u1.hashCode() != u2.hashCode())
{
throw new Exception("Test failed: " + nl +
" hashCode <" + one + "> != hashCode <" + two + ">");
}
}
private static void testNotEqual(String one, String two) throws Exception
{
URI u1 = new URI(one);
URI u2 = new URI(two);
if (u1.equals(u2))
{
throw new Exception("Test failed: " + nl +
" <" + one + "> == <" + two + ">");
}
}
private static void testPE(URI base, String uri) throws Exception
{
boolean got_pe = false;
try
{ new URI(base, uri); }
catch (ParseException pe)
{ got_pe = true; }
if (!got_pe)
{
throw new Exception("Test failed: " + nl +
" <" + uri + "> should be invalid");
}
}
private static void testEscape(String raw, String escaped) throws Exception
{
String test = new String(escape(raw.toCharArray(), uricChar, true));
if (!test.equals(escaped))
throw new Exception("Test failed: " + nl +
" raw-string: " + raw + nl +
" escaped: " + test + nl +
" expected: " + escaped);
}
private static void testUnescape(String escaped, String raw)
throws Exception
{
if (!unescape(escaped, null).equals(raw))
throw new Exception("Test failed: " + nl +
" escaped-string: " + escaped + nl +
" unescaped: " + unescape(escaped, null) + nl +
" expected: " + raw);
}
}