net.dongliu.commons.http.UrlCoder Maven / Gradle / Ivy
package net.dongliu.commons.http;
import java.io.CharArrayWriter;
import java.nio.charset.Charset;
import java.util.BitSet;
/**
*
* Util methods for encode / decode uri.
*
* Encode diffs between url path segment, path parameter, and query part, follow the RFC3986:
*
* For HTTP URLs, a space in a path fragment part has to be encoded to "%20" (not, absolutely not "+"),
* while the "+" character in the path fragment part can be left unencoded.
* Now in the query part, spaces may be encoded to either "+" (just for backwards compatibility) or "%20"
* while the "+" character has to be escaped to "%2B".
*
*
* "?" is allowed unescaped anywhere within a query part,
* "/" is allowed unescaped anywhere within a query part,
* "=" is allowed unescaped anywhere within a path parameter or query parameter value, and within a path segment,
* {@code ":@-._~!$&'()*+,;=" } are allowed unescaped anywhere within a path segment part,
* {@code "/?:@-._~!$&'()*+,;=" } are allowed unescaped anywhere within a fragment part.
*
*
*
* For "https://bob:[email protected]:8080/file;p=1?q=2#third", we can extract the following information:
*
* Scheme https
* User bob
* Password bobby
* Host address www.lunatech.com
* Port 8080
* Path /file
* Path parameters p=1
* Query parameters q=2
* Fragment third
*
*
* java.net.URLEncoder is used to convert a String to the application/x-www-form-urlencoded MIME format,
* which is not suitable for encode urls
*/
public class UrlCoder {
static BitSet queryWhiteSet;
static BitSet pathSegmentWhiteSet;
static BitSet fragmentWhiteSet;
static final int caseDiff = ('a' - 'A');
static {
queryWhiteSet = basicSet();
for (char c : "-_.*?/".toCharArray()) {
queryWhiteSet.set(c);
}
pathSegmentWhiteSet = basicSet();
for (char c : ":@-._~!$&'()*+,;=".toCharArray()) {
pathSegmentWhiteSet.set(c);
}
fragmentWhiteSet = basicSet();
for (char c : "/?:@-._~!$&'()*+,;=".toCharArray()) {
pathSegmentWhiteSet.set(c);
}
}
private static BitSet basicSet() {
BitSet basicSet = new BitSet(256);
int i;
for (i = 'a'; i <= 'z'; i++) {
basicSet.set(i);
}
for (i = 'A'; i <= 'Z'; i++) {
basicSet.set(i);
}
for (i = '0'; i <= '9'; i++) {
basicSet.set(i);
}
return basicSet;
}
/**
* used to encode query parameter: key and value
*/
public static String encodeQuery(String s, String charset) {
return encodeQuery(s, Charset.forName(charset));
}
/**
* used to encode query parameter: key and value
*/
public static String encodeQuery(String s, Charset charset) {
return _encode(s, charset, queryWhiteSet);
}
/**
* decode query parameter: key and value
*/
public static String decodeQuery(String s, String charset) {
return _decode(s, Charset.forName(charset), true);
}
/**
* decode query parameter: key and value
*/
public static String decodeQuery(String s, Charset charset) {
return _decode(s, charset, true);
}
/**
* encode url path segment
*/
public static String encodePathSegment(String s, String charset) {
return encodePathSegment(s, Charset.forName(charset));
}
/**
* encode url path segment
*/
public static String encodePathSegment(String s, Charset charset) {
return _encode(s, charset, pathSegmentWhiteSet);
}
/**
* decode url path segment
*/
public static String decodePathSegment(String s, String charset) {
return _decode(s, Charset.forName(charset), false);
}
/**
* decode url path segment
*/
public static String decodePathSegment(String s, Charset charset) {
return _decode(s, charset, false);
}
/**
* encode url fragment
*/
public static String encodeFragment(String s, String charset) {
return encodeFragment(s, Charset.forName(charset));
}
/**
* encode url fragment
*/
public static String encodeFragment(String s, Charset charset) {
return _encode(s, charset, fragmentWhiteSet);
}
/**
* decode url fragment
*/
public static String decodeFragment(String s, String charset) {
return _decode(s, Charset.forName(charset), false);
}
/**
* decode url fragment
*/
public static String decodeFragment(String s, Charset charset) {
return _decode(s, charset, false);
}
private static String _encode(String s, Charset charset, BitSet whiteSet) {
boolean needToChange = false;
StringBuilder sb = new StringBuilder(s.length());
CharArrayWriter charArrayWriter = new CharArrayWriter();
for (int i = 0; i < s.length(); ) {
int c = (int) s.charAt(i);
if (whiteSet.get(c)) {
sb.append((char) c);
i++;
} else {
do {
charArrayWriter.write(c);
if (c >= 0xD800 && c <= 0xDBFF) {
if ((i + 1) < s.length()) {
int d = (int) s.charAt(i + 1);
if (d >= 0xDC00 && d <= 0xDFFF) {
charArrayWriter.write(d);
i++;
}
}
}
i++;
} while (i < s.length() && !whiteSet.get((c = (int) s.charAt(i))));
charArrayWriter.flush();
String str = new String(charArrayWriter.toCharArray());
byte[] ba = str.getBytes(charset);
for (byte b : ba) {
sb.append('%');
char ch = Character.forDigit((b >> 4) & 0xF, 16);
// converting to use uppercase letter as part of
// the hex value if ch is a letter.
if (Character.isLetter(ch)) {
ch -= caseDiff;
}
sb.append(ch);
ch = Character.forDigit(b & 0xF, 16);
if (Character.isLetter(ch)) {
ch -= caseDiff;
}
sb.append(ch);
}
charArrayWriter.reset();
needToChange = true;
}
}
return (needToChange ? sb.toString() : s);
}
public static String _decode(String s, Charset charset, boolean isQueryPart) {
boolean needToChange = false;
int numChars = s.length();
StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars);
int i = 0;
char c;
byte[] bytes = null;
while (i < numChars) {
c = s.charAt(i);
switch (c) {
case '+':
if (isQueryPart) {
sb.append(' ');
} else {
sb.append('+');
}
i++;
needToChange = true;
break;
case '%':
try {
// (numChars-i)/3 is an upper bound for the number
// of remaining bytes
if (bytes == null)
bytes = new byte[(numChars - i) / 3];
int pos = 0;
while (((i + 2) < numChars) &&
(c == '%')) {
int v = Integer.parseInt(s.substring(i + 1, i + 3), 16);
if (v < 0)
throw new IllegalArgumentException("Illegal hex characters in escape (%) pattern - negative value");
bytes[pos++] = (byte) v;
i += 3;
if (i < numChars)
c = s.charAt(i);
}
// A trailing, incomplete byte encoding such as
// "%x" will cause an exception to be thrown
if ((i < numChars) && (c == '%'))
throw new IllegalArgumentException("Incomplete trailing escape (%) pattern");
sb.append(new String(bytes, 0, pos, charset));
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Illegal hex characters in escape (%) pattern - " + e.getMessage());
}
needToChange = true;
break;
default:
sb.append(c);
i++;
break;
}
}
return (needToChange ? sb.toString() : s);
}
}