com.cloudhopper.commons.util.URLParser Maven / Gradle / Ivy
package com.cloudhopper.commons.util;
/*
* #%L
* ch-commons-util
* %%
* Copyright (C) 2012 Cloudhopper by Twitter
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import java.net.MalformedURLException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudhopper.commons.util.codec.URLCodec;
import java.io.IOException;
/**
* A URL parser for the following pattern:
*
* protocol://[username[:password]@][host[:port]][/path]
*
* Examples:
* p:///
* p://h
* p://h:p
* p://u@h:p
* p://u:p@h:p
* p://h/path/to/something
*
* @author joelauer (twitter: @jjlauer or http://twitter.com/jjlauer)
*/
public class URLParser {
private static final Logger logger = LoggerFactory.getLogger(URLParser.class);
private URLParser() {
// static methods only
}
static private String decode(String str0) throws MalformedURLException {
StringBuilder buf = new StringBuilder(str0.length());
try {
URLCodec.decode(str0, buf);
} catch (IOException e) {
throw new MalformedURLException("Invalid URL: failed while URL decoding '" + str0 + "'");
}
return buf.toString();
}
static public URL parse(String url) throws MalformedURLException {
int pos = 0;
// logger.debug("parsing URL: " + url);
//
// parse protocol
//
int i = url.indexOf("://");
if (i < 0) {
throw new MalformedURLException("Invalid URL [" + url + "]: no protocol specified");
}
// the url we'll be returning
URL r = new URL();
String protocol = url.substring(0, i);
r.setProtocol(protocol);
// logger.debug("parsed protocol: " + protocol);
// skip :// part
pos = i + 3;
// username[:password]
i = url.indexOf('@', pos);
if (i >= 0) {
// found url to contain a username and possibly a password
// logger.debug("found @ char to indicate username:password");
String userPass = url.substring(pos, i);
int atPos = userPass.indexOf(':');
if (atPos >= 0) {
// password exists in this string
String username = userPass.substring(0, atPos);
String password = userPass.substring(atPos+1);
if (username != null && username.length() > 0) {
r.setUsername(decode(username));
}
if (password != null && password.length() > 0) {
r.setPassword(decode(password));
}
} else {
// logger.debug("userPass part only includes a username");
if (userPass.length() > 0) {
r.setUsername(decode(userPass));
}
}
// update the position for the next parsing section
pos = i + 1;
}
//
// host[:port]
//
i = url.indexOf('/', pos);
if (i < 0) {
// maybe to the query string then
i = url.indexOf('?', pos);
if (i < 0) {
// host:port is to the complete end of this string
i = url.length();
}
}
// extract entire host and/or port
String hostPort = url.substring(pos, i);
// did a host actually exist?
if (hostPort != null && hostPort.length() > 0) {
// does this hostPort contain a port?
int colPos = hostPort.indexOf(':');
if (colPos >= 0) {
String host = hostPort.substring(0, colPos);
r.setHost(host);
// logger.debug("parsed host: " + host);
String tempPort = hostPort.substring(colPos+1);
try {
Integer port = Integer.valueOf(tempPort);
r.setPort(port);
// logger.debug("parsed port: " + port);
} catch (NumberFormatException e) {
throw new MalformedURLException("Invalid URL [" + url + "]: port '" + tempPort + "' was not an integer");
}
} else {
// entire string is the host
r.setHost(hostPort);
// logger.debug("parsed host: " + hostPort);
}
} else {
// logger.debug("no host parsed");
}
// next position we'll start parsing from actually starts next
pos = i;
// we may be done
if (pos >= url.length()) {
// logger.debug("early parsing exist after host:port section");
return r;
}
// if we get here, then we know there is more data in the url to parse
// the next character will either be / or ?
if (url.charAt(pos) == '/') {
// we either will read to end of string or till ?
i = url.indexOf('?');
if (i < 0) {
// read till end of string
i = url.length();
}
String path = url.substring(pos, i);
r.setPath(decode(path));
// logger.debug("parsed path: " + path);
}
pos = i;
// we may be done
if (pos >= url.length()) {
// logger.debug("early parsing exist after path section");
return r;
}
// we may have parsed the path above, now parse the query string
if (url.charAt(pos) == '?') {
String query = url.substring(pos+1);
if (query != null && query.length() > 0) {
r.setQuery(query);
}
}
return r;
}
}