org.apache.tomcat.util.net.URL Maven / Gradle / Ivy

Go to download
/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package org.apache.tomcat.util.net;


import java.io.Serializable;
import java.net.MalformedURLException;
import java.util.Locale;


/**
 * URL is designed to provide public APIs for parsing
 * and synthesizing Uniform Resource Locators as similar as possible to the
 * APIs of java.net.URL, but without the ability to open a
 * stream or connection.  One of the consequences of this is that you can
 * construct URLs for protocols for which a URLStreamHandler is not
 * available (such as an "https" URL when JSSE is not installed).
 *
 * WARNING - This class assumes that the string
 * representation of a URL conforms to the spec argument
 * as described in RFC 2396 "Uniform Resource Identifiers: Generic Syntax":
 * 
 *   <scheme>//<authority><path>?<query>#<fragment>
 * 

 *
 * FIXME - This class really ought to end up in a Commons
 * package someplace.
 *
 * @author Craig R. McClanahan
 * @version $Id: URL.java 943260 2010-05-11 20:05:15Z markt $
 */

public final class URL implements Serializable {


    // ----------------------------------------------------------- Constructors


    /**
     * Create a URL object from the specified String representation.
     *
     * @param spec String representation of the URL
     *
     * @exception MalformedURLException if the string representation
     *  cannot be parsed successfully
     */
    public URL(String spec) throws MalformedURLException {

        this(null, spec);

    }


    /**
     * Create a URL object by parsing a string representation relative
     * to a specified context.  Based on logic from JDK 1.3.1's
     * java.net.URL.
     *
     * @param context URL against which the relative representation
     *  is resolved
     * @param spec String representation of the URL (usually relative)
     *
     * @exception MalformedURLException if the string representation
     *  cannot be parsed successfully
     */
    public URL(URL context, String spec) throws MalformedURLException {

        String original = spec;
        int i, limit, c;
        int start = 0;
        String newProtocol = null;
        boolean aRef = false;

        try {

            // Eliminate leading and trailing whitespace
            limit = spec.length();
            while ((limit > 0) && (spec.charAt(limit - 1) <= ' ')) {
                limit--;
            }
            while ((start < limit) && (spec.charAt(start) <= ' ')) {
                start++;
            }

            // If the string representation starts with "url:", skip it
            if (spec.regionMatches(true, start, "url:", 0, 4)) {
                start += 4;
            }

            // Is this a ref relative to the context URL?
            if ((start < spec.length()) && (spec.charAt(start) == '#')) {
                aRef = true;
            }

            // Parse out the new protocol
            for (i = start; !aRef && (i < limit) ; i++) { 
                c = spec.charAt(i);
                if (c == ':') {
                    String s = spec.substring(start, i).toLowerCase(Locale.ENGLISH);
                    // Assume all protocols are valid
                    newProtocol = s;
                    start = i + 1;
                    break;
                } else if( c == '#' ) {
                    aRef = true;
                } else if( !isSchemeChar((char)c) ) {
                    break;
                }
            }

            // Only use our context if the protocols match
            protocol = newProtocol;
            if ((context != null) && ((newProtocol == null) ||
                 newProtocol.equalsIgnoreCase(context.getProtocol()))) {
                // If the context is a hierarchical URL scheme and the spec
                // contains a matching scheme then maintain backwards
                // compatibility and treat it as if the spec didn't contain
                // the scheme; see 5.2.3 of RFC2396
                if ((context.getPath() != null) &&
                    (context.getPath().startsWith("/")))
                    newProtocol = null;
                if (newProtocol == null) {
                    protocol = context.getProtocol();
                    authority = context.getAuthority();
                    userInfo = context.getUserInfo();
                    host = context.getHost();
                    port = context.getPort();
                    file = context.getFile();
                    int question = file.lastIndexOf("?");
                    if (question < 0)
                        path = file;
                    else
                        path = file.substring(0, question);
                }
            }

            if (protocol == null)
                throw new MalformedURLException("no protocol: " + original);

            // Parse out any ref portion of the spec
            i = spec.indexOf('#', start);
            if (i >= 0) {
                ref = spec.substring(i + 1, limit);
                limit = i;
            }

            // Parse the remainder of the spec in a protocol-specific fashion
            parse(spec, start, limit);
            if (context != null)
                normalize();


        } catch (MalformedURLException e) {
            throw e;
        } catch (Exception e) {
            throw new MalformedURLException(e.toString());
        }

    }





    /**
     * Create a URL object from the specified components.  The default port
     * number for the specified protocol will be used.
     *
     * @param protocol Name of the protocol to use
     * @param host Name of the host addressed by this protocol
     * @param file Filename on the specified host
     *
     * @exception MalformedURLException is never thrown, but present for
     *  compatible APIs
     */
    public URL(String protocol, String host, String file)
        throws MalformedURLException {

        this(protocol, host, -1, file);

    }


    /**
     * Create a URL object from the specified components.  Specifying a port
     * number of -1 indicates that the URL should use the default port for
     * that protocol.  Based on logic from JDK 1.3.1's
     * java.net.URL.
     *
     * @param protocol Name of the protocol to use
     * @param host Name of the host addressed by this protocol
     * @param port Port number, or -1 for the default port for this protocol
     * @param file Filename on the specified host
     *
     * @exception MalformedURLException is never thrown, but present for
     *  compatible APIs
     */
    public URL(String protocol, String host, int port, String file)
        throws MalformedURLException {

        this.protocol = protocol;
        this.host = host;
        this.port = port;

        int hash = file.indexOf('#');
        this.file = hash < 0 ? file : file.substring(0, hash);
        this.ref = hash < 0 ? null : file.substring(hash + 1);
        int question = file.lastIndexOf('?');
        if (question >= 0) {
            query = file.substring(question + 1);
            path = file.substring(0, question);
        } else
            path = file;

        if ((host != null) && (host.length() > 0))
            authority = (port == -1) ? host : host + ":" + port;

    }


    // ----------------------------------------------------- Instance Variables


    /**
     * The authority part of the URL.
     */
    private String authority = null;


    /**
     * The filename part of the URL.
     */
    private String file = null;


    /**
     * The host name part of the URL.
     */
    private String host = null;


    /**
     * The path part of the URL.
     */
    private String path = null;


    /**
     * The port number part of the URL.
     */
    private int port = -1;


    /**
     * The protocol name part of the URL.
     */
    private String protocol = null;


    /**
     * The query part of the URL.
     */
    private String query = null;


    /**
     * The reference part of the URL.
     */
    private String ref = null;


    /**
     * The user info part of the URL.
     */
    private String userInfo = null;


    // --------------------------------------------------------- Public Methods


    /**
     * Compare two URLs for equality.  The result is true if and
     * only if the argument is not null, and is a URL object
     * that represents the same URL as this object.  Two
     * URLs are equal if they have the same protocol and
     * reference the same host, the same port number on the host,
     * and the same file and anchor on the host.
     *
     * @param obj The URL to compare against
     */
    @Override
    public boolean equals(Object obj) {

        if (obj == null)
            return (false);
        if (!(obj instanceof URL))
            return (false);
        URL other = (URL) obj;
        if (!sameFile(other))
            return (false);
        return (compare(ref, other.getRef()));

    }


    /**
     * Return the authority part of the URL.
     */
    public String getAuthority() {

        return (this.authority);

    }


    /**
     * Return the filename part of the URL.  NOTE - For
     * compatibility with java.net.URL, this value includes
     * the query string if there was one.  For just the path portion,
     * call getPath() instead.
     */
    public String getFile() {

        if (file == null)
            return ("");
        return (this.file);

    }


    /**
     * Return the host name part of the URL.
     */
    public String getHost() {

        return (this.host);

    }


    /**
     * Return the path part of the URL.
     */
    public String getPath() {

        if (this.path == null)
            return ("");
        return (this.path);

    }


    /**
     * Return the port number part of the URL.
     */
    public int getPort() {

        return (this.port);

    }


    /**
     * Return the protocol name part of the URL.
     */
    public String getProtocol() {

        return (this.protocol);

    }


    /**
     * Return the query part of the URL.
     */
    public String getQuery() {

        return (this.query);

    }


    /**
     * Return the reference part of the URL.
     */
    public String getRef() {

        return (this.ref);

    }


    /**
     * Return the user info part of the URL.
     */
    public String getUserInfo() {

        return (this.userInfo);

    }


    /**
     * Normalize the path (and therefore file)
     * portions of this URL.
     * 
     * NOTE - This method is not part of the public API
     * of java.net.URL, but is provided as a value added
     * service of this implementation.
     *
     * @exception MalformedURLException if a normalization error occurs,
     *  such as trying to move about the hierarchical root
     */
    public void normalize() throws MalformedURLException {

        // Special case for null path
        if (path == null) {
            if (query != null)
                file = "?" + query;
            else
                file = "";
            return;
        }

        // Create a place for the normalized path
        String normalized = path;
        if (normalized.equals("/.")) {
            path = "/";
            if (query != null)
                file = path + "?" + query;
            else
                file = path;
            return;
        }

        // Normalize the slashes and add leading slash if necessary
        if (normalized.indexOf('\\') >= 0)
            normalized = normalized.replace('\\', '/');
        if (!normalized.startsWith("/"))
            normalized = "/" + normalized;

        // Resolve occurrences of "//" in the normalized path
        while (true) {
            int index = normalized.indexOf("//");
            if (index < 0)
                break;
            normalized = normalized.substring(0, index) +
                normalized.substring(index + 1);
        }

        // Resolve occurrences of "/./" in the normalized path
        while (true) {
            int index = normalized.indexOf("/./");
            if (index < 0)
                break;
            normalized = normalized.substring(0, index) +
                normalized.substring(index + 2);
        }

        // Resolve occurrences of "/../" in the normalized path
        while (true) {
            int index = normalized.indexOf("/../");
            if (index < 0)
                break;
            if (index == 0)
                throw new MalformedURLException
                    ("Invalid relative URL reference");
            int index2 = normalized.lastIndexOf('/', index - 1);
            normalized = normalized.substring(0, index2) +
                normalized.substring(index + 3);
        }

        // Resolve occurrences of "/." at the end of the normalized path
        if (normalized.endsWith("/."))
            normalized = normalized.substring(0, normalized.length() - 1);

        // Resolve occurrences of "/.." at the end of the normalized path
        if (normalized.endsWith("/..")) {
            int index = normalized.length() - 3;
            int index2 = normalized.lastIndexOf('/', index - 1);
            if (index2 < 0)
                throw new MalformedURLException
                    ("Invalid relative URL reference");
            normalized = normalized.substring(0, index2 + 1);
        }

        // Return the normalized path that we have completed
        path = normalized;
        if (query != null)
            file = path + "?" + query;
        else
            file = path;

    }


    /**
     * Compare two URLs, excluding the "ref" fields.  Returns true
     * if this URL and the other argument both refer
     * to the same resource.  The two URLs might not both contain
     * the same anchor.
     */
    public boolean sameFile(URL other) {

        if (!compare(protocol, other.getProtocol()))
            return (false);
        if (!compare(host, other.getHost()))
            return (false);
        if (port != other.getPort())
            return (false);
        if (!compare(file, other.getFile()))
            return (false);
        return (true);

    }


    /**
     * Return a string representation of this URL.  This follow the rules in
     * RFC 2396, Section 5.2, Step 7.
     */
    public String toExternalForm() {

        StringBuilder sb = new StringBuilder();
        if (protocol != null) {
            sb.append(protocol);
            sb.append(":");
        }
        if (authority != null) {
            sb.append("//");
            sb.append(authority);
        }
        if (path != null)
            sb.append(path);
        if (query != null) {
            sb.append('?');
            sb.append(query);
        }
        if (ref != null) {
            sb.append('#');
            sb.append(ref);
        }
        return (sb.toString());

    }


    /**
     * Return a string representation of this object.
     */
    @Override
    public String toString() {

        StringBuilder sb = new StringBuilder("URL[");
        sb.append("authority=");
        sb.append(authority);
        sb.append(", file=");
        sb.append(file);
        sb.append(", host=");
        sb.append(host);
        sb.append(", port=");
        sb.append(port);
        sb.append(", protocol=");
        sb.append(protocol);
        sb.append(", query=");
        sb.append(query);
        sb.append(", ref=");
        sb.append(ref);
        sb.append(", userInfo=");
        sb.append(userInfo);
        sb.append("]");
        return (sb.toString());

        //        return (toExternalForm());

    }


    // -------------------------------------------------------- Private Methods


    /**
     * Compare to String values for equality, taking appropriate care if one
     * or both of the values are null.
     *
     * @param first First string
     * @param second Second string
     */
    private boolean compare(String first, String second) {

        if (first == null) {
            if (second == null)
                return (true);
            else
                return (false);
        } else {
            if (second == null)
                return (false);
            else
                return (first.equals(second));
        }

    }


    /**
     * Parse the specified portion of the string representation of a URL,
     * assuming that it has a format similar to that for http.
     *
     * 
FIXME - This algorithm can undoubtedly be optimized
     * for performance.  However, that needs to wait until after sufficient
     * unit tests are implemented to guarantee correct behavior with no
     * regressions.
     *
     * @param spec String representation being parsed
     * @param start Starting offset, which will be just after the ':' (if
     *  there is one) that determined the protocol name
     * @param limit Ending position, which will be the position of the '#'
     *  (if there is one) that delimited the anchor
     *
     * @exception MalformedURLException if a parsing error occurs
     */
    private void parse(String spec, int start, int limit)
        throws MalformedURLException {

        // Trim the query string (if any) off the tail end
        int question = spec.lastIndexOf('?', limit - 1);
        if ((question >= 0) && (question < limit)) {
            query = spec.substring(question + 1, limit);
            limit = question;
        } else {
            query = null;
        }

        // Parse the authority section
        if (spec.indexOf("//", start) == start) {
            int pathStart = spec.indexOf("/", start + 2);
            if ((pathStart >= 0) && (pathStart < limit)) {
                authority = spec.substring(start + 2, pathStart);
                start = pathStart;
            } else {
                authority = spec.substring(start + 2, limit);
                start = limit;
            }
            if (authority.length() > 0) {
                int at = authority.indexOf('@');
                if( at >= 0 ) {
                    userInfo = authority.substring(0,at);
                }
                int ipv6 = authority.indexOf('[',at+1);
                int hStart = at+1;
                if( ipv6 >= 0 ) {
                    hStart = ipv6;
                    ipv6 = authority.indexOf(']', ipv6);
                    if( ipv6 < 0 ) {
                        throw new MalformedURLException(
                                                        "Closing ']' not found in IPV6 address: " + authority);
                    } else {
                        at = ipv6-1;
                    }
                }
                                                        
                int colon = authority.indexOf(':', at+1);
                if (colon >= 0) {
                    try {
                        port =
                            Integer.parseInt(authority.substring(colon + 1));
                    } catch (NumberFormatException e) {
                        throw new MalformedURLException(e.toString());
                    }
                    host = authority.substring(hStart, colon);
                } else {
                    host = authority.substring(hStart);
                    port = -1;
                }
            }
        }

        // Parse the path section
        if (spec.indexOf("/", start) == start) {     // Absolute path
            path = spec.substring(start, limit);
            if (query != null)
                file = path + "?" + query;
            else
                file = path;
            return;
        }

        // Resolve relative path against our context's file
        if (path == null) {
            if (query != null)
                file = "?" + query;
            else
                file = null;
            return;
        }
        if (!path.startsWith("/"))
            throw new MalformedURLException
                ("Base path does not start with '/'");
        if (!path.endsWith("/"))
            path += "/../";
        path += spec.substring(start, limit);
        if (query != null)
            file = path + "?" + query;
        else
            file = path;
        return;

    }

    /**
     * Determine if the character is allowed in the scheme of a URI.
     * See RFC 2396, Section 3.1
     */
    public static boolean isSchemeChar(char c) {
        return Character.isLetterOrDigit(c) ||
            c == '+' || c == '-' || c == '.';
    }

}