All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.cloudhopper.commons.util.URLParser Maven / Gradle / Ivy

package com.cloudhopper.commons.util;

/*
 * #%L
 * ch-commons-util
 * %%
 * Copyright (C) 2012 Cloudhopper by Twitter
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import java.net.MalformedURLException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudhopper.commons.util.codec.URLCodec;
import java.io.IOException;

/**
 * A URL parser for the following pattern:
 * 
 * protocol://[username[:password]@][host[:port]][/path]
 *
 * Examples:
 *   p:///
 *   p://h
 *   p://h:p
 *   p://u@h:p
 *   p://u:p@h:p
 *   p://h/path/to/something
 *
 * @author joelauer (twitter: @jjlauer or http://twitter.com/jjlauer)
 */
public class URLParser {
    private static final Logger logger = LoggerFactory.getLogger(URLParser.class);

    private URLParser() {
        // static methods only
    }

    static private String decode(String str0) throws MalformedURLException {
        StringBuilder buf = new StringBuilder(str0.length());
        try {
            URLCodec.decode(str0, buf);
        } catch (IOException e) {
            throw new MalformedURLException("Invalid URL: failed while URL decoding '" + str0 + "'");
        }
        return buf.toString();
    }

    static public URL parse(String url) throws MalformedURLException {

        int pos = 0;

//        logger.debug("parsing URL: " + url);

        //
        // parse protocol
        //
        int i = url.indexOf("://");
        if (i < 0) {
            throw new MalformedURLException("Invalid URL [" + url + "]: no protocol specified");
        }

        // the url we'll be returning
        URL r = new URL();
        
        String protocol = url.substring(0, i);
        r.setProtocol(protocol);
//        logger.debug("parsed protocol: " + protocol);

        // skip :// part
        pos = i + 3;

        // username[:password]
        i = url.indexOf('@', pos);
        if (i >= 0) {
            // found url to contain a username and possibly a password
//            logger.debug("found @ char to indicate username:password");
            String userPass = url.substring(pos, i);
            int atPos = userPass.indexOf(':');
            if (atPos >= 0) {
                // password exists in this string
                String username = userPass.substring(0, atPos);
                String password = userPass.substring(atPos+1);
                if (username != null && username.length() > 0) {
                    r.setUsername(decode(username));
                }
                if (password != null && password.length() > 0) {
                    r.setPassword(decode(password));
                }
            } else {
//                logger.debug("userPass part only includes a username");
                if (userPass.length() > 0) {
                    r.setUsername(decode(userPass));
                }
            }
            // update the position for the next parsing section
            pos = i + 1;
        }

        //
        // host[:port]
        //
        i = url.indexOf('/', pos);
        if (i < 0) {
            // maybe to the query string then
            i = url.indexOf('?', pos);
            if (i < 0) {
                // host:port is to the complete end of this string
                i = url.length();
            }
        }

        // extract entire host and/or port
        String hostPort = url.substring(pos, i);

        // did a host actually exist?
        if (hostPort != null && hostPort.length() > 0) {
            // does this hostPort contain a port?
            int colPos = hostPort.indexOf(':');
            if (colPos >= 0) {
                String host = hostPort.substring(0, colPos);
                r.setHost(host);
//                logger.debug("parsed host: " + host);

                String tempPort = hostPort.substring(colPos+1);
                try {
                    Integer port = Integer.valueOf(tempPort);
                    r.setPort(port);
//                    logger.debug("parsed port: " + port);
                } catch (NumberFormatException e) {
                    throw new MalformedURLException("Invalid URL [" + url + "]: port '" + tempPort + "' was not an integer");
                }
            } else {
                // entire string is the host
                r.setHost(hostPort);
//                logger.debug("parsed host: " + hostPort);
            }
        } else {
//            logger.debug("no host parsed");
        }

        // next position we'll start parsing from actually starts next
        pos = i;

        // we may be done
        if (pos >= url.length()) {
//            logger.debug("early parsing exist after host:port section");
            return r;
        }

        // if we get here, then we know there is more data in the url to parse
        // the next character will either be / or ?
        if (url.charAt(pos) == '/') {
            // we either will read to end of string or till ?
            i = url.indexOf('?');
            if (i < 0) {
                // read till end of string
                i = url.length();
            }
            String path = url.substring(pos, i);
            r.setPath(decode(path));
//            logger.debug("parsed path: " + path);
        }

        pos = i;

        // we may be done
        if (pos >= url.length()) {
//            logger.debug("early parsing exist after path section");
            return r;
        }

        // we may have parsed the path above, now parse the query string
        if (url.charAt(pos) == '?') {
            String query = url.substring(pos+1);
            if (query != null && query.length() > 0) {
                r.setQuery(query);
            }
        }

        return r;
    }

}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy