jodd.util.URLCoder Maven / Gradle / Ivy
// Copyright (c) 2003-present, Jodd Team (http://jodd.org)
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
package jodd.util;
import jodd.core.JoddCore;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static jodd.util.CharUtil.isAlpha;
import static jodd.util.CharUtil.isDigit;
import static jodd.util.CharUtil.isPchar;
import static jodd.util.CharUtil.isSubDelimiter;
import static jodd.util.CharUtil.isUnreserved;
/**
* Encodes URLs correctly, significantly faster and more convenient.
*
* Here is an example of full URL:
* {@literal https://jodd:[email protected]:8080/file;p=1?q=2#third}.
* It consist of:
*
* - scheme (https)
* - user (jodd)
* - password (ddoj)
* - host (www.jodd.org)
* - port (8080)
* - path (file)
* - path parameter (p=1)
* - query parameter (q=2)
* - fragment (third)
*
* Each URL part has its own encoding rules. The only correct way of
* encoding URLs is to encode each part separately, and then to concatenate
* results. For easier query building you can use {@link #build(String) builder}.
* It provides fluent interface for defining query parameters.
*/
public class URLCoder {
private static final String SCHEME_PATTERN = "([^:/?#]+):";
private static final String HTTP_PATTERN = "(http|https):";
private static final String USERINFO_PATTERN = "([^@/]*)";
private static final String HOST_PATTERN = "([^/?#:]*)";
private static final String PORT_PATTERN = "(\\d*)";
private static final String PATH_PATTERN = "([^?#]*)";
private static final String QUERY_PATTERN = "([^#]*)";
private static final String LAST_PATTERN = "(.*)";
// Regex patterns that matches URIs. See RFC 3986, appendix B
private static final Pattern URI_PATTERN = Pattern.compile(
"^(" + SCHEME_PATTERN + ")?" + "(//(" + USERINFO_PATTERN + "@)?" + HOST_PATTERN + "(:" + PORT_PATTERN +
")?" + ")?" + PATH_PATTERN + "(\\?" + QUERY_PATTERN + ")?" + "(#" + LAST_PATTERN + ")?");
private static final Pattern HTTP_URL_PATTERN = Pattern.compile(
'^' + HTTP_PATTERN + "(//(" + USERINFO_PATTERN + "@)?" + HOST_PATTERN + "(:" + PORT_PATTERN + ")?" + ")?" +
PATH_PATTERN + "(\\?" + LAST_PATTERN + ")?");
/**
* Enumeration to identify the parts of a URI.
*
* Contains methods to indicate whether a given character is valid in a specific URI component.
*
* @see RFC 3986
*/
enum URIPart {
SCHEME {
@Override
public boolean isValid(char c) {
return isAlpha(c) || isDigit(c) || c == '+' || c == '-' || c == '.';
}
},
// AUTHORITY {
// @Override
// public boolean isValid(char c) {
// return isUnreserved(c) || isSubDelimiter(c) || c == ':' || c == '@';
// }
// },
USER_INFO {
@Override
public boolean isValid(char c) {
return isUnreserved(c) || isSubDelimiter(c) || c == ':';
}
},
HOST {
@Override
public boolean isValid(char c) {
return isUnreserved(c) || isSubDelimiter(c);
}
},
PORT {
@Override
public boolean isValid(char c) {
return isDigit(c);
}
},
PATH {
@Override
public boolean isValid(char c) {
return isPchar(c) || c == '/';
}
},
PATH_SEGMENT {
@Override
public boolean isValid(char c) {
return isPchar(c);
}
},
QUERY {
@Override
public boolean isValid(char c) {
return isPchar(c) || c == '/' || c == '?';
}
},
QUERY_PARAM {
@Override
public boolean isValid(char c) {
if (c == '=' || c == '+' || c == '&' || c == ';') {
return false;
}
return isPchar(c) || c == '/' || c == '?';
}
},
FRAGMENT {
@Override
public boolean isValid(char c) {
return isPchar(c) || c == '/' || c == '?';
}
};
/**
* Indicates whether the given character is allowed in this URI component.
*
* @return true
if the character is allowed; {@code false} otherwise
*/
public abstract boolean isValid(char c);
}
// ---------------------------------------------------------------- util methods
/**
* Encodes single URI component.
*/
private static String encodeUriComponent(String source, String encoding, URIPart uriPart) {
if (source == null) {
return null;
}
byte[] bytes;
try {
bytes = encodeBytes(source.getBytes(encoding), uriPart);
} catch (UnsupportedEncodingException ignore) {
return null;
}
char[] chars = new char[bytes.length];
for (int i = 0; i < bytes.length; i++) {
chars[i] = (char) bytes[i];
}
return new String(chars);
}
/**
* Encodes byte array using allowed characters from {@link URIPart}.
*/
private static byte[] encodeBytes(byte[] source, URIPart uriPart) {
ByteArrayOutputStream bos = new ByteArrayOutputStream(source.length);
for (byte b : source) {
if (b < 0) {
b += 256;
}
if (uriPart.isValid((char) b)) {
bos.write(b);
} else {
bos.write('%');
char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
bos.write(hex1);
bos.write(hex2);
}
}
return bos.toByteArray();
}
// ---------------------------------------------------------------- main methods
/**
* Encodes the given URI scheme with the given encoding.
*/
public static String encodeScheme(String scheme, String encoding) {
return encodeUriComponent(scheme, encoding, URIPart.SCHEME);
}
public static String encodeScheme(String scheme) {
return encodeUriComponent(scheme, JoddCore.encoding, URIPart.SCHEME);
}
/* /**
* Encodes the given URI authority with the given encoding.
*
public static String encodeAuthority(String authority, String encoding) {
return encodeUriComponent(authority, encoding, URIPart.AUTHORITY);
}
public static String encodeAuthority(String authority) {
return encodeUriComponent(authority, JoddCore.encoding, URIPart.AUTHORITY);
}
*/
/**
* Encodes the given URI user info with the given encoding.
*/
public static String encodeUserInfo(String userInfo, String encoding) {
return encodeUriComponent(userInfo, encoding, URIPart.USER_INFO);
}
public static String encodeUserInfo(String userInfo) {
return encodeUriComponent(userInfo, JoddCore.encoding, URIPart.USER_INFO);
}
/**
* Encodes the given URI host with the given encoding.
*/
public static String encodeHost(String host, String encoding) {
return encodeUriComponent(host, encoding, URIPart.HOST);
}
public static String encodeHost(String host) {
return encodeUriComponent(host, JoddCore.encoding, URIPart.HOST);
}
/**
* Encodes the given URI port with the given encoding.
*/
public static String encodePort(String port, String encoding) {
return encodeUriComponent(port, encoding, URIPart.PORT);
}
public static String encodePort(String port) {
return encodeUriComponent(port, JoddCore.encoding, URIPart.PORT);
}
/**
* Encodes the given URI path with the given encoding.
*/
public static String encodePath(String path, String encoding) {
return encodeUriComponent(path, encoding, URIPart.PATH);
}
public static String encodePath(String path) {
return encodeUriComponent(path, JoddCore.encoding, URIPart.PATH);
}
/**
* Encodes the given URI path segment with the given encoding.
*/
public static String encodePathSegment(String segment, String encoding) {
return encodeUriComponent(segment, encoding, URIPart.PATH_SEGMENT);
}
public static String encodePathSegment(String segment) {
return encodeUriComponent(segment, JoddCore.encoding, URIPart.PATH_SEGMENT);
}
/**
* Encodes the given URI query with the given encoding.
*/
public static String encodeQuery(String query, String encoding) {
return encodeUriComponent(query, encoding, URIPart.QUERY);
}
public static String encodeQuery(String query) {
return encodeUriComponent(query, JoddCore.encoding, URIPart.QUERY);
}
/**
* Encodes the given URI query parameter with the given encoding.
*/
public static String encodeQueryParam(String queryParam, String encoding) {
return encodeUriComponent(queryParam, encoding, URIPart.QUERY_PARAM);
}
public static String encodeQueryParam(String queryParam) {
return encodeUriComponent(queryParam, JoddCore.encoding, URIPart.QUERY_PARAM);
}
/**
* Encodes the given URI fragment with the given encoding.
*/
public static String encodeFragment(String fragment, String encoding) {
return encodeUriComponent(fragment, encoding, URIPart.FRAGMENT);
}
public static String encodeFragment(String fragment) {
return encodeUriComponent(fragment, JoddCore.encoding, URIPart.FRAGMENT);
}
// ---------------------------------------------------------------- url
/**
* @see #encodeUri(String, String)
*/
public static String encodeUri(String uri) {
return encodeUri(uri, JoddCore.encoding);
}
/**
* Encodes the given source URI into an encoded String. All various URI components are
* encoded according to their respective valid character sets.
*
This method does not attempt to encode "=" and "{@literal &}"
* characters in query parameter names and query parameter values because they cannot
* be parsed in a reliable way.
*/
public static String encodeUri(String uri, String encoding) {
Matcher m = URI_PATTERN.matcher(uri);
if (m.matches()) {
String scheme = m.group(2);
String authority = m.group(3);
String userinfo = m.group(5);
String host = m.group(6);
String port = m.group(8);
String path = m.group(9);
String query = m.group(11);
String fragment = m.group(13);
return encodeUriComponents(scheme, authority, userinfo, host, port, path, query, fragment, encoding);
}
throw new IllegalArgumentException("Invalid URI: " + uri);
}
/**
* @see #encodeHttpUrl(String, String)
*/
public static String encodeHttpUrl(String httpUrl) {
return encodeHttpUrl(httpUrl, JoddCore.encoding);
}
/**
* Encodes the given HTTP URI into an encoded String. All various URI components are
* encoded according to their respective valid character sets.
*
This method does not support fragments ({@code #}),
* as these are not supposed to be sent to the server, but retained by the client.
*
This method does not attempt to encode "=" and "{@literal &}"
* characters in query parameter names and query parameter values because they cannot
* be parsed in a reliable way.
*/
public static String encodeHttpUrl(String httpUrl, String encoding) {
Matcher m = HTTP_URL_PATTERN.matcher(httpUrl);
if (m.matches()) {
String scheme = m.group(1);
String authority = m.group(2);
String userinfo = m.group(4);
String host = m.group(5);
String portString = m.group(7);
String path = m.group(8);
String query = m.group(10);
return encodeUriComponents(scheme, authority, userinfo, host, portString, path, query, null, encoding);
}
throw new IllegalArgumentException("Invalid HTTP URL: " + httpUrl);
}
private static String encodeUriComponents(
String scheme, String authority, String userInfo,
String host, String port, String path, String query,
String fragment, String encoding) {
StringBuilder sb = new StringBuilder();
if (scheme != null) {
sb.append(encodeScheme(scheme, encoding));
sb.append(':');
}
if (authority != null) {
sb.append("//");
if (userInfo != null) {
sb.append(encodeUserInfo(userInfo, encoding));
sb.append('@');
}
if (host != null) {
sb.append(encodeHost(host, encoding));
}
if (port != null) {
sb.append(':');
sb.append(encodePort(port, encoding));
}
}
sb.append(encodePath(path, encoding));
if (query != null) {
sb.append('?');
sb.append(encodeQuery(query, encoding));
}
if (fragment != null) {
sb.append('#');
sb.append(encodeFragment(fragment, encoding));
}
return sb.toString();
}
// ---------------------------------------------------------------- builder
/**
* Creates URL builder for user-friendly way of building URLs.
* Provided path is parsed and {@link #encodeUri(String) encoded}.
* @see #build(String, boolean)
*/
public static Builder build(String path) {
return build(path, true);
}
/**
* Creates URL builder with given path that can be optionally encoded.
* Since most of the time path is valid and does not require to be encoded,
* use this method to gain some performance. When encoding flag is turned off,
* provided path is used without processing.
*
* The purpose of builder is to help with query parameters. All other URI parts
* should be set previously or after the URL is built.
*/
public static Builder build(String path, boolean encodePath) {
return new Builder(path, encodePath, JoddCore.encoding);
}
public static class Builder {
protected final StringBuilder url;
protected final String encoding;
protected boolean hasParams;
public Builder(String path, boolean encodePath, String encoding) {
this.encoding = encoding;
url = new StringBuilder();
if (encodePath) {
url.append(encodeUri(path, encoding));
} else {
url.append(path);
}
this.hasParams = url.indexOf(StringPool.QUESTION_MARK) != -1;
}
/**
* Appends new query parameter to the url.
*/
public Builder queryParam(String name, String value) {
url.append(hasParams ? '&' : '?');
hasParams = true;
url.append(encodeQueryParam(name, encoding));
if ((value != null) && (value.length() > 0)) {
url.append('=');
url.append(encodeQueryParam(value, encoding));
}
return this;
}
/**
* Returns full URL.
*/
@Override
public String toString() {
return url.toString();
}
}
}