HTTPClient.URI Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of grinder-httpclient Show documentation
Modified version of HTTPClient used by The Grinder. The original can be found at http://www.innovation.ch/java/HTTPClient/.
There is a newer version: 3.11
Show newest version
/*
 * @(#)URI.java						0.3-3 06/05/2001
 *
 *  This file is part of the HTTPClient package
 *  Copyright (C) 1996-2001 Ronald Tschalär
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free
 *  Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *  MA 02111-1307, USA
 *
 *  For questions, suggestions, bug-reports, enhancement-requests etc.
 *  I may be contacted at:
 *
 *  [email protected]
 *
 *  The HTTPClient's home page is located at:
 *
 *  http://www.innovation.ch/java/HTTPClient/ 
 *
 */

package HTTPClient;

import java.net.URL;
import java.net.MalformedURLException;
import java.util.BitSet;
import java.util.Hashtable;

/**
 * This class represents a generic URI, as defined in RFC-2396.
 * This is similar to java.net.URL, with the following enhancements:
 * 
 * it doesn't require a URLStreamhandler to exist for the scheme; this
 *     allows this class to be used to hold any URI, construct absolute
 *     URIs from relative ones, etc.
 * 
it handles escapes correctly
 * 
equals() works correctly
 * 
relative URIs are correctly constructed
 * 
it has methods for accessing various fields such as userinfo,
 *     fragment, params, etc.
 * 
it handles less common forms of resources such as the "*" used in
 *     http URLs.
 * 
 *
 * The elements are always stored in escaped form.
 *
 * 
While RFC-2396 distinguishes between just two forms of URI's, those that
 * follow the generic syntax and those that don't, this class knows about a
 * third form, named semi-generic, used by quite a few popular schemes.
 * Semi-generic syntax treats the path part as opaque, i.e. has the form
 * <scheme>://<authority>/<opaque> . Relative URI's of this
 * type are only resolved as far as absolute paths - relative paths do not
 * exist.
 *
 * 
Ideally, java.net.URL should subclass URI.
 *
 * @see		rfc-2396
 * @version	0.3-3  06/05/2001
 * @author	Ronald Tschalär
 * @since	V0.3-1
 */
public class URI
{
    /**
     * If true, then the parser will resolve certain URI's in backwards
     * compatible (but technically incorrect) manner. Example:
     *
     *
     * base   = http://a/b/c/d;p?q
     * rel    = http:g
     * result = http:g		(correct)
     * result = http://a/b/c/g	(backwards compatible)
     *
     *
     * See rfc-2396, section 5.2, step 3, second paragraph.
     */
    public static final boolean ENABLE_BACKWARDS_COMPATIBILITY = true;

    protected static final Hashtable defaultPorts          = new Hashtable();
    protected static final Hashtable usesGenericSyntax     = new Hashtable();
    protected static final Hashtable usesSemiGenericSyntax = new Hashtable();

    /* various character classes as defined in the draft */
    protected static final BitSet alphanumChar;
    protected static final BitSet markChar;
    protected static final BitSet reservedChar;
    protected static final BitSet unreservedChar;
    protected static final BitSet uricChar;
    protected static final BitSet pcharChar;
    protected static final BitSet userinfoChar;
    protected static final BitSet schemeChar;
    protected static final BitSet hostChar;
    protected static final BitSet opaqueChar;
    protected static final BitSet reg_nameChar;

    /* These are not directly in the spec, but used for escaping and
     * unescaping parts
     */

    /** list of characters which must not be unescaped when unescaping a scheme */
    public static final BitSet resvdSchemeChar;
    /** list of characters which must not be unescaped when unescaping a userinfo */
    public static final BitSet resvdUIChar;
    /** list of characters which must not be unescaped when unescaping a host */
    public static final BitSet resvdHostChar;
    /** list of characters which must not be unescaped when unescaping a path */
    public static final BitSet resvdPathChar;
    /** list of characters which must not be unescaped when unescaping a query string */
    public static final BitSet resvdQueryChar;
    /** list of characters which must not be escaped when escaping a path */
    public static final BitSet escpdPathChar;
    /** list of characters which must not be escaped when escaping a query string */
    public static final BitSet escpdQueryChar;
    /** list of characters which must not be escaped when escaping a fragment identifier */
    public static final BitSet escpdFragChar;

    static
    {
	defaultPorts.put("http",      new Integer(80));
	defaultPorts.put("shttp",     new Integer(80));
	defaultPorts.put("http-ng",   new Integer(80));
	defaultPorts.put("coffee",    new Integer(80));
	defaultPorts.put("https",     new Integer(443));
	defaultPorts.put("ftp",       new Integer(21));
	defaultPorts.put("telnet",    new Integer(23));
	defaultPorts.put("nntp",      new Integer(119));
	defaultPorts.put("news",      new Integer(119));
	defaultPorts.put("snews",     new Integer(563));
	defaultPorts.put("hnews",     new Integer(80));
	defaultPorts.put("smtp",      new Integer(25));
	defaultPorts.put("gopher",    new Integer(70));
	defaultPorts.put("wais",      new Integer(210));
	defaultPorts.put("whois",     new Integer(43));
	defaultPorts.put("whois++",   new Integer(63));
	defaultPorts.put("rwhois",    new Integer(4321));
	defaultPorts.put("imap",      new Integer(143));
	defaultPorts.put("pop",       new Integer(110));
	defaultPorts.put("prospero",  new Integer(1525));
	defaultPorts.put("irc",       new Integer(194));
	defaultPorts.put("ldap",      new Integer(389));
	defaultPorts.put("nfs",       new Integer(2049));
	defaultPorts.put("z39.50r",   new Integer(210));
	defaultPorts.put("z39.50s",   new Integer(210));
	defaultPorts.put("vemmi",     new Integer(575));
	defaultPorts.put("videotex",  new Integer(516));
	defaultPorts.put("cmp",       new Integer(829));

	usesGenericSyntax.put("http", Boolean.TRUE);
	usesGenericSyntax.put("https", Boolean.TRUE);
	usesGenericSyntax.put("shttp", Boolean.TRUE);
	usesGenericSyntax.put("coffee", Boolean.TRUE);
	usesGenericSyntax.put("ftp", Boolean.TRUE);
	usesGenericSyntax.put("file", Boolean.TRUE);
	usesGenericSyntax.put("nntp", Boolean.TRUE);
	usesGenericSyntax.put("news", Boolean.TRUE);
	usesGenericSyntax.put("snews", Boolean.TRUE);
	usesGenericSyntax.put("hnews", Boolean.TRUE);
	usesGenericSyntax.put("imap", Boolean.TRUE);
	usesGenericSyntax.put("wais", Boolean.TRUE);
	usesGenericSyntax.put("nfs", Boolean.TRUE);
	usesGenericSyntax.put("sip", Boolean.TRUE);
	usesGenericSyntax.put("sips", Boolean.TRUE);
	usesGenericSyntax.put("sipt", Boolean.TRUE);
	usesGenericSyntax.put("sipu", Boolean.TRUE);
	/* Note: schemes which definitely don't use the generic-URI syntax
	 * and must therefore never appear in the above list:
	 * "urn", "mailto", "sdp", "service", "tv", "gsm-sms", "tel", "fax",
	 * "modem", "eid", "cid", "mid", "data", "ldap"
	 */

	usesSemiGenericSyntax.put("ldap", Boolean.TRUE);
	usesSemiGenericSyntax.put("irc", Boolean.TRUE);
	usesSemiGenericSyntax.put("gopher", Boolean.TRUE);
	usesSemiGenericSyntax.put("videotex", Boolean.TRUE);
	usesSemiGenericSyntax.put("rwhois", Boolean.TRUE);
	usesSemiGenericSyntax.put("whois++", Boolean.TRUE);
	usesSemiGenericSyntax.put("smtp", Boolean.TRUE);
	usesSemiGenericSyntax.put("telnet", Boolean.TRUE);
	usesSemiGenericSyntax.put("prospero", Boolean.TRUE);
	usesSemiGenericSyntax.put("pop", Boolean.TRUE);
	usesSemiGenericSyntax.put("vemmi", Boolean.TRUE);
	usesSemiGenericSyntax.put("z39.50r", Boolean.TRUE);
	usesSemiGenericSyntax.put("z39.50s", Boolean.TRUE);
	usesSemiGenericSyntax.put("stream", Boolean.TRUE);
	usesSemiGenericSyntax.put("cmp", Boolean.TRUE);

	alphanumChar = new BitSet(128);
	for (int ch='0'; ch<='9'; ch++)  alphanumChar.set(ch);
	for (int ch='A'; ch<='Z'; ch++)  alphanumChar.set(ch);
	for (int ch='a'; ch<='z'; ch++)  alphanumChar.set(ch);

	markChar = new BitSet(128);
	markChar.set('-');
	markChar.set('_');
	markChar.set('.');
	markChar.set('!');
	markChar.set('~');
	markChar.set('*');
	markChar.set('\'');
	markChar.set('(');
	markChar.set(')');

	reservedChar = new BitSet(128);
	reservedChar.set(';');
	reservedChar.set('/');
	reservedChar.set('?');
	reservedChar.set(':');
	reservedChar.set('@');
	reservedChar.set('&');
	reservedChar.set('=');
	reservedChar.set('+');
	reservedChar.set('$');
	reservedChar.set(',');

	unreservedChar = new BitSet(128);
	unreservedChar.or(alphanumChar);
	unreservedChar.or(markChar);

	uricChar = new BitSet(128);
	uricChar.or(unreservedChar);
	uricChar.or(reservedChar);
	uricChar.set('%');

	pcharChar = new BitSet(128);
	pcharChar.or(unreservedChar);
	pcharChar.set('%');
	pcharChar.set(':');
	pcharChar.set('@');
	pcharChar.set('&');
	pcharChar.set('=');
	pcharChar.set('+');
	pcharChar.set('$');
	pcharChar.set(',');

	userinfoChar = new BitSet(128);
	userinfoChar.or(unreservedChar);
	userinfoChar.set('%');
	userinfoChar.set(';');
	userinfoChar.set(':');
	userinfoChar.set('&');
	userinfoChar.set('=');
	userinfoChar.set('+');
	userinfoChar.set('$');
	userinfoChar.set(',');

	// this actually shouldn't contain uppercase letters...
	schemeChar = new BitSet(128);
	schemeChar.or(alphanumChar);
	schemeChar.set('+');
	schemeChar.set('-');
	schemeChar.set('.');

	opaqueChar = new BitSet(128);
	opaqueChar.or(uricChar);

	hostChar = new BitSet(128);
	hostChar.or(alphanumChar);
	hostChar.set('-');
	hostChar.set('.');

	reg_nameChar = new BitSet(128);
	reg_nameChar.or(unreservedChar);
	reg_nameChar.set('$');
	reg_nameChar.set(',');
	reg_nameChar.set(';');
	reg_nameChar.set(':');
	reg_nameChar.set('@');
	reg_nameChar.set('&');
	reg_nameChar.set('=');
	reg_nameChar.set('+');

	resvdSchemeChar = new BitSet(128);
	resvdSchemeChar.set(':');

	resvdUIChar = new BitSet(128);
	resvdUIChar.set('@');

	resvdHostChar = new BitSet(128);
	resvdHostChar.set(':');
	resvdHostChar.set('/');
	resvdHostChar.set('?');
	resvdHostChar.set('#');

	resvdPathChar = new BitSet(128);
	resvdPathChar.set('/');
	resvdPathChar.set(';');
	resvdPathChar.set('?');
	resvdPathChar.set('#');

	resvdQueryChar = new BitSet(128);
	resvdQueryChar.set('#');

	escpdPathChar = new BitSet(128);
	escpdPathChar.or(pcharChar);
	escpdPathChar.set('%');
	escpdPathChar.set('/');
	escpdPathChar.set(';');

	escpdQueryChar = new BitSet(128);
	escpdQueryChar.or(uricChar);
	escpdQueryChar.clear('#');

	escpdFragChar = new BitSet(128);
	escpdFragChar.or(uricChar);
    }


    /* our uri in pieces */

    protected static final int OPAQUE       = 0;
    protected static final int SEMI_GENERIC = 1;
    protected static final int GENERIC      = 2;

    protected int     type;
    protected String  scheme;
    protected String  opaque;
    protected String  userinfo;
    protected String  host;
    protected int     port = -1;
    protected String  path;
    protected String  query;
    protected String  fragment;


    /* cache the java.net.URL */

    protected URL     url = null;


    // Constructors

    /**
     * Constructs a URI from the given string representation. The string
     * must be an absolute URI.
     *
     * @param uri a String containing an absolute URI
     * @exception ParseException if no scheme can be found or a specified
     *                           port cannot be parsed as a number
     */
    public URI(String uri)  throws ParseException
    {
	this((URI) null, uri);
    }


    /**
     * Constructs a URI from the given string representation, relative to
     * the given base URI.
     *
     * @param base    the base URI, relative to which rel_uri
     *                is to be parsed
     * @param rel_uri a String containing a relative or absolute URI
     * @exception ParseException if base is null and
     *                           rel_uri is not an absolute URI, or
     *                           if base is not null and the scheme
     *                           is not known to use the generic syntax, or
     *                           if a given port cannot be parsed as a number
     */
    public URI(URI base, String rel_uri)  throws ParseException
    {
	/* Parsing is done according to the following RE:
	 *
	 *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
	 *   12            3  4          5       6  7        8 9
	 *
	 * 2: scheme
	 * 4: authority
	 * 5: path
	 * 7: query
	 * 9: fragment
	 */

	char[] uri = rel_uri.toCharArray();
	int pos = 0, idx, len = uri.length;


	// trim()

	while (pos < len  &&  Character.isWhitespace(uri[pos]))    pos++;
	while (len > 0    &&  Character.isWhitespace(uri[len-1]))  len--;


	// strip the special "url" or "uri" scheme

	if (pos < len-3  &&  uri[pos+3] == ':'  &&
	    (uri[pos+0] == 'u'  ||  uri[pos+0] == 'U')  &&
	    (uri[pos+1] == 'r'  ||  uri[pos+1] == 'R')  &&
	    (uri[pos+2] == 'i'  ||  uri[pos+2] == 'I'  ||
	     uri[pos+2] == 'l'  ||  uri[pos+2] == 'L'))
	    pos += 4;


	// get scheme: (([^:/?#]+):)?

	idx = pos;
	while (idx < len  &&  uri[idx] != ':'  &&  uri[idx] != '/'  &&
	       uri[idx] != '?'  &&  uri[idx] != '#')
	    idx++;
	if (idx < len  &&  uri[idx] == ':')
	{
	    scheme = rel_uri.substring(pos, idx).trim().toLowerCase();
	    pos = idx + 1;
	}


	// check and resolve scheme

	String final_scheme = scheme;
	if (scheme == null)
	{
	    if (base == null)
		throw new ParseException("No scheme found");
	    final_scheme = base.scheme;
	}


	// check for generic vs. opaque

	type = usesGenericSyntax(final_scheme) ? GENERIC :
	       usesSemiGenericSyntax(final_scheme) ? SEMI_GENERIC : OPAQUE;
	if (type == OPAQUE)
	{
	    if (base != null  &&  scheme == null)
		throw new ParseException("Can't resolve relative URI for " +
					 "scheme " + final_scheme);

	    opaque = escape(rel_uri.substring(pos), opaqueChar, true);
	    if (opaque.length() > 0  &&  opaque.charAt(0) == '/')
		opaque = "%2F" + opaque.substring(1);
	    return;
	}


	// get authority: (//([^/?#]*))?

	if (pos+1 < len  &&  uri[pos] == '/'  &&  uri[pos+1] == '/')
	{
	    pos += 2;
	    idx = pos;
	    while (idx < len  &&  uri[idx] != '/'  &&  uri[idx] != '?'  &&
		   uri[idx] != '#')
		idx++;

	    parse_authority(rel_uri.substring(pos, idx), final_scheme);
	    pos = idx;
	}


	// handle semi-generic and generic uri's
	
	if (type == SEMI_GENERIC)
	{
	    path = escape(rel_uri.substring(pos), uricChar, true);
	    if (path.length() > 0  &&  path.charAt(0) != '/')
		path = '/' + path;
	}
	else
	{
	    // get path: ([^?#]*)

	    idx = pos;
	    while (idx < len  &&  uri[idx] != '?'  &&  uri[idx] != '#')
		idx++;
	    path = escape(rel_uri.substring(pos, idx), escpdPathChar, true);
	    pos = idx;


	    // get query: (\?([^#]*))?

	    if (pos < len  &&  uri[pos] == '?')
	    {
		pos += 1;
		idx = pos;
		while (idx < len  &&  uri[idx] != '#')
		    idx++;
		this.query = escape(rel_uri.substring(pos, idx), escpdQueryChar, true);
		pos = idx;
	    }


	    // get fragment: (#(.*))?

	    if (pos < len  &&  uri[pos] == '#')
		this.fragment = escape(rel_uri.substring(pos+1, len), escpdFragChar, true);
	}


	// now resolve the parts relative to the base

	if (base != null)
	{
	    if (scheme != null  &&			// resolve scheme
		!(scheme.equals(base.scheme)  &&  ENABLE_BACKWARDS_COMPATIBILITY))
	      return;
	    scheme = base.scheme;

	    if (host != null)				// resolve authority
		return;
	    userinfo = base.userinfo;
	    host     = base.host;
	    port     = base.port;

	    if (type == SEMI_GENERIC)			// can't resolve relative paths
		return;

	    if (path.length() == 0  &&  query == null)	// current doc
	    {
		path  = base.path;
		query = base.query;
		return;
	    }

	    if (path.length() == 0  ||  path.charAt(0) != '/')	// relative path
	    {
		idx = (base.path != null) ? base.path.lastIndexOf('/') : -1;
		if (idx < 0)
		    path = '/' + path;
		else
		    path = base.path.substring(0, idx+1) + path;

		path = canonicalizePath(path);
	    }
	}
    }

    /**
     * Remove all "/../" and "/./" from path, where possible. Leading "/../"'s
     * are not removed.
     *
     * @param path the path to canonicalize
     * @return the canonicalized path
     */
    public static String canonicalizePath(String path)
    {
	int idx, len = path.length();
	if (!((idx = path.indexOf("/.")) != -1  &&
	      (idx == len-2  ||  path.charAt(idx+2) == '/'  ||
	       (path.charAt(idx+2) == '.'  &&
		(idx == len-3  ||  path.charAt(idx+3) == '/')) )))
	    return path;

	char[] p = new char[path.length()];		// clean path
	path.getChars(0, p.length, p, 0);

	int beg = 0;
	for (idx=1; idx beg  &&  p[end] != '/')  end--;
		    if (p[end] != '/')  continue;
		    if (idx == len-2) end++;
		    idx += 2;
		}
		else
		    continue;
		System.arraycopy(p, idx, p, end, len-idx);
		len -= idx - end;
		idx = end;
	    }
	}

	return new String(p, 0, len);
    }

    /**
     * Parse the authority specific part
     */
    private void parse_authority(String authority, String scheme)
	    throws ParseException
    {
	/* The authority is further parsed according to:
	 *
	 *  ^(([^@]*)@?)(\[[^]]*\]|[^:]*)?(:(.*))?
	 *   12         3       4 5
	 *
	 * 2: userinfo
	 * 3: host
	 * 5: port
	 */

	char[] uri = authority.toCharArray();
	int pos = 0, idx, len = uri.length;


	// get userinfo: (([^@]*)@?)

	idx = pos;
	while (idx < len  &&  uri[idx] != '@')
	    idx++;
	if (idx < len  &&  uri[idx] == '@')
	{
	    this.userinfo = escape(authority.substring(pos, idx), userinfoChar, true);
	    pos = idx + 1;
	}


	// get host: (\[[^]]*\]|[^:]*)?

	idx = pos;
	if (idx < len  &&  uri[idx] == '[')	// IPv6
	{
	    while (idx < len  &&  uri[idx] != ']')
		idx++;
	    if (idx == len)
		throw new ParseException("No closing ']' found for opening '['"+
					 " at position " + pos +
					 " in authority `" + authority + "'");
	    this.host = authority.substring(pos+1, idx);
	    idx++;
	}
	else
	{
	    while (idx < len  &&  uri[idx] != ':')
		idx++;
	    this.host = escape(authority.substring(pos, idx), uricChar, true);
	}
	pos = idx;


	// get port: (:(.*))?

	if (pos < (len-1)  &&  uri[pos] == ':')
	{
	    int p;
	    try
	    {
		p = Integer.parseInt(
			    unescape(authority.substring(pos+1, len), null));
		if (p < 0)  throw new NumberFormatException();
	    }
	    catch (NumberFormatException e)
	    {
		throw new ParseException(authority.substring(pos+1, len) +
					 " is an invalid port number");
	    }
	    if (p == defaultPort(scheme))
		this.port = -1;
	    else
		this.port = p;
	}
    }


    /**
     * Construct a URI from the given URL.
     *
     * @param url the URL
     * @exception ParseException if url.toExternalForm() generates
     *                           an invalid string representation
     */
    public URI(URL url)  throws ParseException
    {
	this((URI) null, url.toExternalForm());
    }


    /**
     * Constructs a URI from the given parts, using the default port for
     * this scheme (if known). The parts must be in unescaped form.
     *
     * @param scheme the scheme (sometimes known as protocol)
     * @param host   the host
     * @param path   the path part
     * @exception ParseException if scheme is null
     */
    public URI(String scheme, String host, String path)  throws ParseException
    {
	this(scheme, null, host, -1, path, null, null);
    }


    /**
     * Constructs a URI from the given parts. The parts must be in unescaped
     * form.
     *
     * @param scheme the scheme (sometimes known as protocol)
     * @param host   the host
     * @param port   the port
     * @param path   the path part
     * @exception ParseException if scheme is null
     */
    public URI(String scheme, String host, int port, String path)
	    throws ParseException
    {
	this(scheme, null, host, port, path, null, null);
    }


    /**
     * Constructs a URI from the given parts. Any part except for the
     * the scheme may be null. The parts must be in unescaped form.
     *
     * @param scheme   the scheme (sometimes known as protocol)
     * @param userinfo the userinfo
     * @param host     the host
     * @param port     the port
     * @param path     the path part
     * @param query    the query string
     * @param fragment the fragment identifier
     * @exception ParseException if scheme is null
     */
    public URI(String scheme, String userinfo, String host, int port,
	       String path, String query, String fragment)
	    throws ParseException
    {
	if (scheme == null)
	    throw new ParseException("missing scheme");
	this.scheme = escape(scheme.trim().toLowerCase(), schemeChar, true);
	if (userinfo != null)
	    this.userinfo = escape(userinfo.trim(), userinfoChar, true);
	if (host != null)
	{
	    host = host.trim();
	    this.host = isIPV6Addr(host) ? host : escape(host, hostChar, true);
	}
	if (port != defaultPort(scheme))
	    this.port     = port;
	if (path != null)
	    this.path     = escape(path.trim(), escpdPathChar, true);	// ???
	if (query != null)
	    this.query    = escape(query.trim(), escpdQueryChar, true);
	if (fragment != null)
	    this.fragment = escape(fragment.trim(), escpdFragChar, true);

	type = usesGenericSyntax(scheme) ? GENERIC : SEMI_GENERIC;
    }

    private static final boolean isIPV6Addr(String host)
    {
	if (host.indexOf(':') < 0)
	    return false;

	for (int idx=0; idx '9')  &&  ch != ':')
		return false;
	}

	return true;
    }


    /**
     * Constructs an opaque URI from the given parts.
     *
     * @param scheme the scheme (sometimes known as protocol)
     * @param opaque the opaque part
     * @exception ParseException if scheme is null
     */
    public URI(String scheme, String opaque)
	    throws ParseException
    {
	if (scheme == null)
	    throw new ParseException("missing scheme");
	this.scheme = escape(scheme.trim().toLowerCase(), schemeChar, true);
	this.opaque = escape(opaque, opaqueChar, true);

	type = OPAQUE;
    }


    // Class Methods

    /**
     * @return true if the scheme should be parsed according to the
     *         generic-URI syntax
     */
    public static boolean usesGenericSyntax(String scheme)
    {
	return usesGenericSyntax.containsKey(scheme.trim().toLowerCase());
    }


    /**
     * @return true if the scheme should be parsed according to a
     *         semi-generic-URI syntax <scheme&tgt;://<hostport>/<opaque>
     */
    public static boolean usesSemiGenericSyntax(String scheme)
    {
	return usesSemiGenericSyntax.containsKey(scheme.trim().toLowerCase());
    }


    /**
     * Return the default port used by a given protocol.
     *
     * @param protocol the protocol
     * @return the port number, or 0 if unknown
     */
    public final static int defaultPort(String protocol)
    {
	Integer port = (Integer) defaultPorts.get(protocol.trim().toLowerCase());
	return (port != null) ? port.intValue() : 0;
    }


    // Instance Methods

    /**
     * @return the scheme (often also referred to as protocol)
     */
    public String getScheme()
    {
	return scheme;
    }


    /**
     * @return the opaque part, or null if this URI is generic
     */
    public String getOpaque()
    {
	return opaque;
    }


    /**
     * @return the host
     */
    public String getHost()
    {
	return host;
    }


    /**
     * @return the port, or -1 if it's the default port, or 0 if unknown
     */
    public int getPort()
    {
	return port;
    }


    /**
     * @return the user info
     */
    public String getUserinfo()
    {
	return userinfo;
    }


    /**
     * @return the path
     */
    public String getPath()
    {
	return path;
    }


    /**
     * @return the query string
     */
    public String getQueryString()
    {
	return query;
    }


    /**
     * @return the path and query
     */
    public String getPathAndQuery()
    {
	if (query == null)
	    return path;
	if (path == null)
	    return "?" + query;
	return path + "?" + query;
    }


    /**
     * @return the fragment
     */
    public String getFragment()
    {
	return fragment;
    }


    /**
     * Does the scheme specific part of this URI use the generic-URI syntax?
     *
     * In general URI are split into two categories: opaque-URI and
     * generic-URI. The generic-URI syntax is the syntax most are familiar
     * with from URLs such as ftp- and http-URLs, which is roughly:
     * 
     * generic-URI = scheme ":" [ "//" server ] [ "/" ] [ path_segments ] [ "?" query ]
     * 
     * (see RFC-2396 for exact syntax). Only URLs using the generic-URI syntax
     * can be used to create and resolve relative URIs.
     *
     * Whether a given scheme is parsed according to the generic-URI
     * syntax or wether it is treated as opaque is determined by an internal
     * table of URI schemes.
     *
     * @see rfc-2396
     */
    public boolean isGenericURI()
    {
	return (type == GENERIC);
    }

    /**
     * Does the scheme specific part of this URI use the semi-generic-URI syntax?
     *
     * 
Many schemes which don't follow the full generic syntax actually
     * follow a reduced form where the path part is treated is opaque. This
     * is used for example by ldap, smtp, pop, etc, and is roughly
     * 
     * generic-URI = scheme ":" [ "//" server ] [ "/" [ opaque_path ] ]
     * 
     * I.e. parsing is identical to the generic-syntax, except that the path
     * part is not further parsed. URLs using the semi-generic-URI syntax can
     * be used to create and resolve relative URIs with the restriction that
     * all paths are treated as absolute.
     *
     * Whether a given scheme is parsed according to the semi-generic-URI
     * syntax is determined by an internal table of URI schemes.
     *
     * @see #isGenericURI()
     */
    public boolean isSemiGenericURI()
    {
	return (type == SEMI_GENERIC);
    }


    /**
     * Will try to create a java.net.URL object from this URI.
     *
     * @return the URL
     * @exception MalformedURLException if no handler is available for the
     *            scheme
     */
    public URL toURL()  throws MalformedURLException
    {
	if (url != null)  return url;

	if (opaque != null)
	    return (url = new URL(scheme + ":" + opaque));

	String hostinfo;
	if (userinfo != null  &&  host != null)
	    hostinfo = userinfo + "@" + host;
	else if (userinfo != null)
	    hostinfo = userinfo + "@";
	else
	    hostinfo = host;

	StringBuffer file = new StringBuffer(100);
	assemblePath(file, true, true, false);

	url = new URL(scheme, hostinfo, port, file.toString());
	return url;
    }


    private final void assemblePath(StringBuffer buf, boolean printEmpty,
				    boolean incFragment, boolean unescape)
    {
	if ((path == null  ||  path.length() == 0)  &&  printEmpty)
	    buf.append('/');

	if (path != null)
	    buf.append(unescape ? unescapeNoPE(path, resvdPathChar) : path);

	if (query != null)
	{
	    buf.append('?');
	    buf.append(unescape ? unescapeNoPE(query, resvdQueryChar) : query);
	}

	if (fragment != null  &&  incFragment)
	{
	    buf.append('#');
	    buf.append(unescape ? unescapeNoPE(fragment, null) : fragment);
	}
    }


    private final String stringify(boolean unescape)
    {
	StringBuffer uri = new StringBuffer(100);

	if (scheme != null)
	{
	    uri.append(unescape ? unescapeNoPE(scheme, resvdSchemeChar) : scheme);
	    uri.append(':');
	}

	if (opaque != null)		// it's an opaque-uri
	{
	    uri.append(unescape ? unescapeNoPE(opaque, null) : opaque);
	    return uri.toString();
	}

	if (userinfo != null  ||  host != null  ||  port != -1)
	    uri.append("//");

	if (userinfo != null)
	{
	    uri.append(unescape ? unescapeNoPE(userinfo, resvdUIChar) : userinfo);
	    uri.append('@');
	}

	if (host != null)
	{
	    if (host.indexOf(':') < 0)
		uri.append(unescape ? unescapeNoPE(host, resvdHostChar) : host);
	    else
		uri.append('[').append(host).append(']');
	}

	if (port != -1)
	{
	    uri.append(':');
	    uri.append(port);
	}

	assemblePath(uri, false, true, unescape);

	return uri.toString();
    }


    /**
     * @return a string representation of this URI suitable for use in
     *         links, headers, etc.
     */
    public String toExternalForm()
    {
	return stringify(false);
    }


    /**
     * Return the URI as string. This differs from toExternalForm() in that
     * all elements are unescaped before assembly. This is not suitable
     * for passing to other apps or in header fields and such, and is usually
     * not what you want.
     *
     * @return the URI as a string
     * @see #toExternalForm()
     */
    public String toString()
    {
	return stringify(true);
    }


    /**
     * @return true if other is either a URI or URL and it
     *         matches the current URI
     */
    public boolean equals(Object other)
    {
	if (other instanceof URI)
	{
	    URI o = (URI) other;
	    return (scheme.equals(o.scheme)  &&
		    (
		     type == OPAQUE  &&  areEqual(opaque, o.opaque)  ||

		     type == SEMI_GENERIC  &&
		      areEqual(userinfo, o.userinfo)  &&
		      areEqualIC(host, o.host)  &&
		      port == o.port  &&
		      areEqual(path, o.path)  ||

		     type == GENERIC  &&
		      areEqual(userinfo, o.userinfo)  &&
		      areEqualIC(host, o.host)  &&
		      port == o.port  &&
		      pathsEqual(path, o.path)  &&
		      areEqual(query, o.query)  &&
		      areEqual(fragment, o.fragment)
		    ));
	}

	if (other instanceof URL)
	{
	    URL o = (URL) other;
	    String h, f;

	    if (userinfo != null)
		h = userinfo + "@" + host;
	    else
		h = host;

	    f = getPathAndQuery();

	    return (scheme.equalsIgnoreCase(o.getProtocol())  &&
		    (type == OPAQUE  &&  opaque.equals(o.getFile())  ||

		     type == SEMI_GENERIC  &&
		       areEqualIC(h, o.getHost())  &&
		       (port == o.getPort()  ||
			o.getPort() == defaultPort(scheme))  &&
		       areEqual(f, o.getFile())  ||

		     type == GENERIC  &&
		       areEqualIC(h, o.getHost())  &&
		       (port == o.getPort()  ||
			o.getPort() == defaultPort(scheme))  &&
		       pathsEqual(f, o.getFile())  &&
		       areEqual(fragment, o.getRef())
		    )
		   );
	}

	return false;
    }

    private static final boolean areEqual(String s1, String s2)
    {
	return (s1 == null  &&  s2 == null  ||
		s1 != null  &&  s2 != null  &&
		  (s1.equals(s2)  ||
		   unescapeNoPE(s1, null).equals(unescapeNoPE(s2, null)))
	       );
    }

    private static final boolean areEqualIC(String s1, String s2)
    {
	return (s1 == null  &&  s2 == null  ||
		s1 != null  &&  s2 != null  &&
		  (s1.equalsIgnoreCase(s2)  ||
		   unescapeNoPE(s1, null).equalsIgnoreCase(unescapeNoPE(s2, null)))
	       );
    }

    private static final boolean pathsEqual(String p1, String p2)
    {
	if (p1 == null  &&  p2 == null)
	    return true;
	if (p1 == null  ||  p2 == null)
	    return false;
	if (p1.equals(p2))
	    return true;

	// ok, so it wasn't that simple. Let's split into parts and compare
	// unescaped.
	int pos1 = 0, end1 = p1.length(), pos2 = 0, end2 = p2.length();
	while (pos1 < end1  &&  pos2 < end2)
	{
	    int start1 = pos1, start2 = pos2;

	    char ch;
	    while (pos1 < end1  &&  (ch = p1.charAt(pos1)) != '/'  &&  ch != ';')
		pos1++;
	    while (pos2 < end2  &&  (ch = p2.charAt(pos2)) != '/'  &&  ch != ';')
		pos2++;

	    if (pos1 == end1  &&  pos2 < end2  ||
		pos2 == end2  &&  pos1 < end1  ||
		pos1 < end1  &&  pos2 < end2  &&  p1.charAt(pos1) != p2.charAt(pos2))
		return false;

	    if ((!p1.regionMatches(start1, p2, start2, pos1-start1)  ||  (pos1-start1) != (pos2-start2))  &&
		!unescapeNoPE(p1.substring(start1, pos1), null).equals(unescapeNoPE(p2.substring(start2, pos2), null)))
		return false;

	    pos1++;
	    pos2++;
	}

	return (pos1 == end1  &&  pos2 == end2);
    }

    private int hashCode = -1;

    /**
     * The hash code is calculated over scheme, host, path, and query.
     *
     * @return the hash code
     */
    public int hashCode()
    {
	if (hashCode == -1)
	    hashCode = (scheme != null ? unescapeNoPE(scheme, null).hashCode() : 0) + 
		       (type == OPAQUE ?
			  (opaque != null ? unescapeNoPE(opaque, null).hashCode() : 0) * 7
			: (host != null ? unescapeNoPE(host, null).toLowerCase().hashCode() : 0) * 7 +
			  (path != null ? unescapeNoPE(path, null).hashCode() : 0) * 13 +
			  (query != null ? unescapeNoPE(query, null).hashCode() : 0) * 17);

	return hashCode;
    }


    /**
     * Escape any character not in the given character class. Characters
     * greater 255 are always escaped according to ??? .
     *
     * @param elem         the string to escape
     * @param allowed_char the BitSet of all allowed characters
     * @param utf8         if true, will first UTF-8 encode unallowed characters
     * @return the string with all characters not in allowed_char
     *         escaped
     */
    public static String escape(String elem, BitSet allowed_char, boolean utf8)
    {
	return new String(escape(elem.toCharArray(), allowed_char, utf8));
    }

    /**
     * Escape any character not in the given character class. Characters
     * greater 255 are always escaped according to ??? .
     *
     * @param elem         the array of characters to escape
     * @param allowed_char the BitSet of all allowed characters
     * @param utf8         if true, will first UTF-8 encode unallowed characters
     * @return the elem array with all characters not in allowed_char
     *         escaped
     */
    public static char[] escape(char[] elem, BitSet allowed_char, boolean utf8)
    {
	int cnt=0;
	for (int idx=0; idx= 0x0080)
			cnt += 3;
		    if (elem[idx] >= 0x00800)
			cnt += 3;
		    if ((elem[idx] & 0xFC00) == 0xD800  &&  idx+1 < elem.length  &&
			(elem[idx+1] & 0xFC00) == 0xDC00)
		      cnt -= 6;
		}
	    }
	}

	if (cnt == 0)  return elem;

	char[] tmp = new char[elem.length + cnt];
	for (int idx=0, pos=0; idx>  6) & 0x1F));
		    pos = enc(tmp, pos, 0x80 | ((c >>  0) & 0x3F));
		}
		else if (!((c & 0xFC00) == 0xD800  &&  idx+1 < elem.length  &&
			     (elem[idx+1] & 0xFC00) == 0xDC00))
		{
		    pos = enc(tmp, pos, 0xE0 | ((c >> 12) & 0x0F));
		    pos = enc(tmp, pos, 0x80 | ((c >>  6) & 0x3F));
		    pos = enc(tmp, pos, 0x80 | ((c >>  0) & 0x3F));
		}
		else
		{
		    int ch = ((c & 0x03FF) << 10) | (elem[++idx] & 0x03FF);
		    ch += 0x10000;
		    pos = enc(tmp, pos, 0xF0 | ((ch >> 18) & 0x07));
		    pos = enc(tmp, pos, 0x80 | ((ch >> 12) & 0x3F));
		    pos = enc(tmp, pos, 0x80 | ((ch >>  6) & 0x3F));
		    pos = enc(tmp, pos, 0x80 | ((ch >>  0) & 0x3F));
		}
	    }
	    else
		pos = enc(tmp, pos, c);
	}

	return tmp;
    }

    private static final char[] hex =
	    {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};

    private static final int enc(char[] out, int pos, int c)
    {
	out[pos++] = '%';
	out[pos++] = hex[(c >> 4) & 0xf];
	out[pos++] = hex[c & 0xf];
	return pos;
    }

    /**
     * Unescape escaped characters (i.e. %xx) except reserved ones.
     *
     * @param str      the string to unescape
     * @param reserved the characters which may not be unescaped, or null
     * @return the unescaped string
     * @exception ParseException if the two digits following a `%' are
     *            not a valid hex number
     */
    public static final String unescape(String str, BitSet reserved)
	    throws ParseException
    {
	if (str == null  ||  str.indexOf('%') == -1)
	    return str;  				// an optimization

	char[] buf = str.toCharArray();
	char[] res = new char[buf.length];

	char[] utf = new char[4];
	int utf_idx = 0, utf_len = -1;
	int didx = 0;
	for (int sidx=0; sidx buf.length)
			throw new NumberFormatException();
		    ch = Integer.parseInt(str.substring(sidx+1,sidx+3), 16);
		    if (ch < 0)
			throw new NumberFormatException();
		    sidx += 2;
                }
                catch (NumberFormatException e)
                {
		    /* Hmm, people not reading specs again, so we just
		     * ignore it...
                    throw new ParseException(str.substring(sidx,sidx+3) +
                                            " is an invalid code");
		    */
		    ch = buf[sidx];
                }

		// check if we're working on a utf-char
		if (utf_len > 0)
		{
		    if ((ch & 0xC0) != 0x80)	// oops, we misinterpreted
		    {
			didx = copyBuf(utf, utf_idx, ch, res, didx, reserved, false);
			utf_len = -1;
		    }
		    else if (utf_idx == utf_len - 1)	// end-of-char
		    {
			if ((utf[0] & 0xE0) == 0xC0)
			    ch = (utf[0] & 0x1F) <<  6 |
				 (ch & 0x3F);
			else if ((utf[0] & 0xF0) == 0xE0)
			    ch = (utf[0] & 0x0F) << 12 |
				 (utf[1] & 0x3F) <<  6 |
				 (ch & 0x3F);
			else
			    ch = (utf[0] & 0x07) << 18 |
				 (utf[1] & 0x3F) << 12 |
				 (utf[2] & 0x3F) <<  6 |
				 (ch & 0x3F);
			if (reserved != null  &&  reserved.get(ch))
			    didx = copyBuf(utf, utf_idx, ch, res, didx, null, true);
			else if (utf_len < 4)
			    res[didx++] = (char) ch;
			else
			{
			    ch -= 0x10000;
			    res[didx++] = (char) ((ch >> 10)    | 0xD800);
			    res[didx++] = (char) ((ch & 0x03FF) | 0xDC00);
			}
			utf_len = -1;
		    }
		    else				// continue
			utf[utf_idx++] = (char) ch;
		}
		// check if this is the start of a utf-char
		else if ((ch & 0xE0) == 0xC0  ||  (ch & 0xF0) == 0xE0  ||
			 (ch & 0xF8) == 0xF0)
		{
		    if ((ch & 0xE0) == 0xC0)
			utf_len = 2;
		    else if ((ch & 0xF0) == 0xE0)
			utf_len = 3;
		    else
			utf_len = 4;
		    utf[0] = (char) ch;
		    utf_idx = 1;
		}
		// leave reserved alone
		else if (reserved != null  &&  reserved.get(ch))
		{
		    res[didx++] = buf[sidx];
		    sidx -= 2;
		}
		// just use the decoded version
		else
		    res[didx++] = (char) ch;
	    }
	    else if (utf_len > 0)	// oops, we misinterpreted
	    {
		didx = copyBuf(utf, utf_idx, buf[sidx], res, didx, reserved, false);
		utf_len = -1;
	    }
	    else
		res[didx++] = buf[sidx];
	}
	if (utf_len > 0)	// oops, we misinterpreted
	    didx = copyBuf(utf, utf_idx, -1, res, didx, reserved, false);

	return new String(res, 0, didx);
    }

    private static final int copyBuf(char[] utf, int utf_idx, int ch,
				     char[] res, int didx, BitSet reserved,
				     boolean escapeAll)
    {
	if (ch >= 0)
	    utf[utf_idx++] = (char) ch;

	for (int idx=0; idx" + nl +
				"  rel-URI  = <" + relURI + ">" + nl+
				"  expected   <" + result + ">" + nl+
				"  but got    <" + new URI(base, relURI) + ">");
	}
    }

    private static void testEqual(String one, String two)  throws Exception
    {
	URI u1 = new URI(one);
	URI u2 = new URI(two);

	if (!u1.equals(u2))
	{
	    throw new Exception("Test failed: " + nl +
				"  <" + one + "> != <" + two + ">");
	}
	if (u1.hashCode() != u2.hashCode())
	{
	    throw new Exception("Test failed: " + nl +
				"  hashCode <" + one + "> != hashCode <" + two + ">");
	}
    }

    private static void testNotEqual(String one, String two)  throws Exception
    {
	URI u1 = new URI(one);
	URI u2 = new URI(two);

	if (u1.equals(u2))
	{
	    throw new Exception("Test failed: " + nl +
				"  <" + one + "> == <" + two + ">");
	}
    }

    private static void testPE(URI base, String uri)  throws Exception
    {
	boolean got_pe = false;
	try
	    { new URI(base, uri); }
	catch (ParseException pe)
	    { got_pe = true; }
	if (!got_pe)
	{
	    throw new Exception("Test failed: " + nl +
				"  <" + uri + "> should be invalid");
	}
    }

    private static void testEscape(String raw, String escaped)  throws Exception
    {
	String test = new String(escape(raw.toCharArray(), uricChar, true));
	if (!test.equals(escaped))
	    throw new Exception("Test failed: " + nl +
				"  raw-string: " + raw + nl +
				"  escaped:    " + test + nl +
				"  expected:   " + escaped);
    }

    private static void testUnescape(String escaped, String raw)
	throws Exception
    {
	if (!unescape(escaped, null).equals(raw))
	    throw new Exception("Test failed: " + nl +
				"  escaped-string: " + escaped + nl +
				"  unescaped:      " + unescape(escaped, null) + nl +
				"  expected:       " + raw);
    }
}