All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.dongliu.commons.http.UrlCoder Maven / Gradle / Ivy

The newest version!
package net.dongliu.commons.http;

import java.io.CharArrayWriter;
import java.nio.charset.Charset;
import java.util.BitSet;

/**
 * 

* Util methods for encode / decode uri. *

* Encode diffs between url path segment, path parameter, and query part, follow the RFC3986: *

* For HTTP URLs, a space in a path fragment part has to be encoded to "%20" (not, absolutely not "+"), * while the "+" character in the path fragment part can be left unencoded. * Now in the query part, spaces may be encoded to either "+" (just for backwards compatibility) or "%20" * while the "+" character has to be escaped to "%2B". *

*

* "?" is allowed unescaped anywhere within a query part, * "/" is allowed unescaped anywhere within a query part, * "=" is allowed unescaped anywhere within a path parameter or query parameter value, and within a path segment, * {@code ":@-._~!$&'()*+,;=" } are allowed unescaped anywhere within a path segment part, * {@code "/?:@-._~!$&'()*+,;=" } are allowed unescaped anywhere within a fragment part. *

* *
 * For "https://bob:[email protected]:8080/file;p=1?q=2#third", we can extract the following information:
 *
 *     Scheme	https
 *     User	bob
 *     Password	bobby
 *     Host address	www.lunatech.com
 *     Port	8080
 *     Path	/file
 *     Path parameters	p=1
 *     Query parameters	q=2
 *     Fragment	third
 * 
* * java.net.URLEncoder is used to convert a String to the application/x-www-form-urlencoded MIME format, * which is not suitable for encode urls */ public class UrlCoder { static BitSet queryWhiteSet; static BitSet pathSegmentWhiteSet; static BitSet fragmentWhiteSet; static final int caseDiff = ('a' - 'A'); static { queryWhiteSet = basicSet(); for (char c : "-_.*?/".toCharArray()) { queryWhiteSet.set(c); } pathSegmentWhiteSet = basicSet(); for (char c : ":@-._~!$&'()*+,;=".toCharArray()) { pathSegmentWhiteSet.set(c); } fragmentWhiteSet = basicSet(); for (char c : "/?:@-._~!$&'()*+,;=".toCharArray()) { pathSegmentWhiteSet.set(c); } } private static BitSet basicSet() { BitSet basicSet = new BitSet(256); int i; for (i = 'a'; i <= 'z'; i++) { basicSet.set(i); } for (i = 'A'; i <= 'Z'; i++) { basicSet.set(i); } for (i = '0'; i <= '9'; i++) { basicSet.set(i); } return basicSet; } /** * used to encode query parameter: key and value */ public static String encodeQuery(String s, String charset) { return encodeQuery(s, Charset.forName(charset)); } /** * used to encode query parameter: key and value */ public static String encodeQuery(String s, Charset charset) { return _encode(s, charset, queryWhiteSet); } /** * decode query parameter: key and value */ public static String decodeQuery(String s, String charset) { return _decode(s, Charset.forName(charset), true); } /** * decode query parameter: key and value */ public static String decodeQuery(String s, Charset charset) { return _decode(s, charset, true); } /** * encode url path segment */ public static String encodePathSegment(String s, String charset) { return encodePathSegment(s, Charset.forName(charset)); } /** * encode url path segment */ public static String encodePathSegment(String s, Charset charset) { return _encode(s, charset, pathSegmentWhiteSet); } /** * decode url path segment */ public static String decodePathSegment(String s, String charset) { return _decode(s, Charset.forName(charset), false); } /** * decode url path segment */ public static String decodePathSegment(String s, Charset charset) { return _decode(s, charset, false); } /** * encode url fragment */ public static String encodeFragment(String s, String charset) { return encodeFragment(s, Charset.forName(charset)); } /** * encode url fragment */ public static String encodeFragment(String s, Charset charset) { return _encode(s, charset, fragmentWhiteSet); } /** * decode url fragment */ public static String decodeFragment(String s, String charset) { return _decode(s, Charset.forName(charset), false); } /** * decode url fragment */ public static String decodeFragment(String s, Charset charset) { return _decode(s, charset, false); } private static String _encode(String s, Charset charset, BitSet whiteSet) { boolean needToChange = false; StringBuilder sb = new StringBuilder(s.length()); CharArrayWriter charArrayWriter = new CharArrayWriter(); for (int i = 0; i < s.length(); ) { int c = (int) s.charAt(i); if (whiteSet.get(c)) { sb.append((char) c); i++; } else { do { charArrayWriter.write(c); if (c >= 0xD800 && c <= 0xDBFF) { if ((i + 1) < s.length()) { int d = (int) s.charAt(i + 1); if (d >= 0xDC00 && d <= 0xDFFF) { charArrayWriter.write(d); i++; } } } i++; } while (i < s.length() && !whiteSet.get((c = (int) s.charAt(i)))); charArrayWriter.flush(); String str = new String(charArrayWriter.toCharArray()); byte[] ba = str.getBytes(charset); for (byte b : ba) { sb.append('%'); char ch = Character.forDigit((b >> 4) & 0xF, 16); // converting to use uppercase letter as part of // the hex value if ch is a letter. if (Character.isLetter(ch)) { ch -= caseDiff; } sb.append(ch); ch = Character.forDigit(b & 0xF, 16); if (Character.isLetter(ch)) { ch -= caseDiff; } sb.append(ch); } charArrayWriter.reset(); needToChange = true; } } return (needToChange ? sb.toString() : s); } public static String _decode(String s, Charset charset, boolean isQueryPart) { boolean needToChange = false; int numChars = s.length(); StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars); int i = 0; char c; byte[] bytes = null; while (i < numChars) { c = s.charAt(i); switch (c) { case '+': if (isQueryPart) { sb.append(' '); } else { sb.append('+'); } i++; needToChange = true; break; case '%': try { // (numChars-i)/3 is an upper bound for the number // of remaining bytes if (bytes == null) bytes = new byte[(numChars - i) / 3]; int pos = 0; while (((i + 2) < numChars) && (c == '%')) { int v = Integer.parseInt(s.substring(i + 1, i + 3), 16); if (v < 0) throw new IllegalArgumentException("Illegal hex characters in escape (%) pattern - negative value"); bytes[pos++] = (byte) v; i += 3; if (i < numChars) c = s.charAt(i); } // A trailing, incomplete byte encoding such as // "%x" will cause an exception to be thrown if ((i < numChars) && (c == '%')) throw new IllegalArgumentException("Incomplete trailing escape (%) pattern"); sb.append(new String(bytes, 0, pos, charset)); } catch (NumberFormatException e) { throw new IllegalArgumentException("Illegal hex characters in escape (%) pattern - " + e.getMessage()); } needToChange = true; break; default: sb.append(c); i++; break; } } return (needToChange ? sb.toString() : s); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy