All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fireflysource.net.http.common.codec.URIUtils Maven / Gradle / Ivy

There is a newer version: 5.0.2
Show newest version
package com.fireflysource.net.http.common.codec;

import com.fireflysource.common.object.TypeUtils;
import com.fireflysource.common.string.StringUtils;
import com.fireflysource.common.string.Utf8Appendable;
import com.fireflysource.common.string.Utf8StringBuilder;
import com.fireflysource.net.http.common.model.HostPort;

import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

/**
 * URI Utility methods.
 * 

* This class assists with the decoding and encoding or HTTP URI's. * It differs from the java.net.URL class as it does not provide * communications ability, but it does assist with query string * formatting. *

*/ public class URIUtils implements Cloneable { public static final String SLASH = "/"; public static final String HTTP = "http"; public static final String HTTPS = "https"; // Use UTF-8 as per http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars public static final Charset __CHARSET = StandardCharsets.UTF_8; private URIUtils() { } /** * Encode a URI path. * This is the same encoding offered by URLEncoder, except that * the '/' character is not encoded. * * @param path The path the encode * @return The encoded path */ public static String encodePath(String path) { if (path == null || path.length() == 0) return path; StringBuilder buf = encodePath(null, path, 0); return buf == null ? path : buf.toString(); } /** * Encode a URI path. * * @param path The path the encode * @param buf StringBuilder to encode path into (or null) * @return The StringBuilder or null if no substitutions required. */ public static StringBuilder encodePath(StringBuilder buf, String path) { return encodePath(buf, path, 0); } /** * Encode a URI path. * * @param path The path the encode * @param buf StringBuilder to encode path into (or null) * @return The StringBuilder or null if no substitutions required. */ private static StringBuilder encodePath(StringBuilder buf, String path, int offset) { byte[] bytes = null; if (buf == null) { loop: for (int i = offset; i < path.length(); i++) { char c = path.charAt(i); switch (c) { case '%': case '?': case ';': case '#': case '"': case '\'': case '<': case '>': case ' ': case '[': case '\\': case ']': case '^': case '`': case '{': case '|': case '}': buf = new StringBuilder(path.length() * 2); break loop; default: if (c > 127) { bytes = path.getBytes(URIUtils.__CHARSET); buf = new StringBuilder(path.length() * 2); break loop; } } } if (buf == null) return null; } int i; loop: for (i = offset; i < path.length(); i++) { char c = path.charAt(i); switch (c) { case '%': buf.append("%25"); continue; case '?': buf.append("%3F"); continue; case ';': buf.append("%3B"); continue; case '#': buf.append("%23"); continue; case '"': buf.append("%22"); continue; case '\'': buf.append("%27"); continue; case '<': buf.append("%3C"); continue; case '>': buf.append("%3E"); continue; case ' ': buf.append("%20"); continue; case '[': buf.append("%5B"); continue; case '\\': buf.append("%5C"); continue; case ']': buf.append("%5D"); continue; case '^': buf.append("%5E"); continue; case '`': buf.append("%60"); continue; case '{': buf.append("%7B"); continue; case '|': buf.append("%7C"); continue; case '}': buf.append("%7D"); continue; default: if (c > 127) { bytes = path.getBytes(URIUtils.__CHARSET); break loop; } buf.append(c); } } if (bytes != null) { for (; i < bytes.length; i++) { byte c = bytes[i]; switch (c) { case '%': buf.append("%25"); continue; case '?': buf.append("%3F"); continue; case ';': buf.append("%3B"); continue; case '#': buf.append("%23"); continue; case '"': buf.append("%22"); continue; case '\'': buf.append("%27"); continue; case '<': buf.append("%3C"); continue; case '>': buf.append("%3E"); continue; case ' ': buf.append("%20"); continue; case '[': buf.append("%5B"); continue; case '\\': buf.append("%5C"); continue; case ']': buf.append("%5D"); continue; case '^': buf.append("%5E"); continue; case '`': buf.append("%60"); continue; case '{': buf.append("%7B"); continue; case '|': buf.append("%7C"); continue; case '}': buf.append("%7D"); continue; default: if (c < 0) { buf.append('%'); TypeUtils.toHex(c, buf); } else buf.append((char) c); } } } return buf; } /** * Encode a raw URI String and convert any raw spaces to * their "%20" equivalent. * * @param str input raw string * @return output with spaces converted to "%20" */ public static String encodeSpaces(String str) { return StringUtils.replaceStr(str, " ", "%20"); } /** * Encode a raw String and convert any specific characters to their URI encoded equivalent. * * @param str input raw string * @param charsToEncode the list of raw characters that need to be encoded (if encountered) * @return output with specified characters encoded. */ @SuppressWarnings("Duplicates") public static String encodeSpecific(String str, String charsToEncode) { if ((str == null) || (str.length() == 0)) return null; if ((charsToEncode == null) || (charsToEncode.length() == 0)) return str; char[] find = charsToEncode.toCharArray(); int len = str.length(); StringBuilder ret = new StringBuilder((int) (len * 0.20d)); for (int i = 0; i < len; i++) { char c = str.charAt(i); boolean escaped = false; for (char f : find) { if (c == f) { escaped = true; ret.append('%'); int d = 0xf & ((0xF0 & c) >> 4); ret.append((char) ((d > 9 ? ('A' - 10) : '0') + d)); d = 0xf & c; ret.append((char) ((d > 9 ? ('A' - 10) : '0') + d)); break; } } if (!escaped) { ret.append(c); } } return ret.toString(); } /** * Decode a raw String and convert any specific URI encoded sequences into characters. * * @param str input raw string * @param charsToDecode the list of raw characters that need to be decoded (if encountered), leaving all other encoded sequences alone. * @return output with specified characters decoded. */ @SuppressWarnings("Duplicates") public static String decodeSpecific(String str, String charsToDecode) { if ((str == null) || (str.length() == 0)) return null; if ((charsToDecode == null) || (charsToDecode.length() == 0)) return str; int idx = str.indexOf('%'); if (idx == -1) { // no hits return str; } char[] find = charsToDecode.toCharArray(); int len = str.length(); Utf8StringBuilder ret = new Utf8StringBuilder(len); ret.append(str, 0, idx); for (int i = idx; i < len; i++) { char c = str.charAt(i); switch (c) { case '%': if ((i + 2) < len) { char u = str.charAt(i + 1); char l = str.charAt(i + 2); char result = (char) (0xff & (TypeUtils.convertHexDigit(u) * 16 + TypeUtils.convertHexDigit(l))); boolean decoded = false; for (char f : find) { if (f == result) { ret.append(result); decoded = true; break; } } if (decoded) { i += 2; } else { ret.append(c); } } else { throw new IllegalArgumentException("Bad URI % encoding"); } break; default: ret.append(c); break; } } return ret.toString(); } /** * Encode a URI path. * * @param path The path the encode * @param buf StringBuilder to encode path into (or null) * @param encode String of characters to encode. % is always encoded. * @return The StringBuilder or null if no substitutions required. */ public static StringBuilder encodeString(StringBuilder buf, String path, String encode) { if (buf == null) { for (int i = 0; i < path.length(); i++) { char c = path.charAt(i); if (c == '%' || encode.indexOf(c) >= 0) { buf = new StringBuilder(path.length() << 1); break; } } if (buf == null) return null; } for (int i = 0; i < path.length(); i++) { char c = path.charAt(i); if (c == '%' || encode.indexOf(c) >= 0) { buf.append('%'); StringUtils.append(buf, (byte) (0xff & c), 16); } else buf.append(c); } return buf; } /* Decode a URI path and strip parameters */ public static String decodePath(String path) { return decodePath(path, 0, path.length()); } /* Decode a URI path and strip parameters of UTF-8 path */ public static String decodePath(String path, int offset, int length) { try { Utf8StringBuilder builder = null; int end = offset + length; for (int i = offset; i < end; i++) { char c = path.charAt(i); switch (c) { case '%': if (builder == null) { builder = new Utf8StringBuilder(path.length()); builder.append(path, offset, i - offset); } // lenient percent decoding if (i >= end) { // [LENIENT] a percent sign at end of string. builder.append('%'); i = end; } else if (end > (i + 1)) { char type = path.charAt(i + 1); if (type == 'u') { // We have a possible (deprecated) microsoft unicode code point "%u####" // - not recommended to use as it's limited to 2 bytes. if ((i + 5) >= end) { // [LENIENT] we have a partial "%u####" at the end of a string. builder.append(path, i, (end - i)); i = end; } else { // this seems wrong, as we are casting to a char, but that's the known // limitation of this deprecated encoding (only 2 bytes allowed) if (StringUtils.isHex(path, i + 2, 4)) { builder.append((char) (0xffff & TypeUtils.parseInt(path, i + 2, 4, 16))); i += 5; } else { // [LENIENT] copy the "%u" as-is. builder.append(path, i, 2); i += 1; } } } else if (end > (i + 2)) { // we have a possible "%##" encoding if (StringUtils.isHex(path, i + 1, 2)) { builder.append((byte) TypeUtils.parseInt(path, i + 1, 2, 16)); i += 2; } else { builder.append(path, i, 3); i += 2; } } else { // [LENIENT] incomplete "%##" sequence at end of string builder.append(path, i, (end - i)); i = end; } } else { // [LENIENT] the "%" at the end of the string builder.append(path, i, (end - i)); i = end; } break; case ';': if (builder == null) { builder = new Utf8StringBuilder(path.length()); builder.append(path, offset, i - offset); } while (++i < end) { if (path.charAt(i) == '/') { builder.append('/'); break; } } break; default: if (builder != null) builder.append(c); break; } } if (builder != null) return builder.toString(); if (offset == 0 && length == path.length()) return path; return path.substring(offset, end); } catch (Utf8Appendable.NotUtf8Exception e) { return decodeISO88591Path(path, offset, length); } } /* Decode a URI path and strip parameters of ISO-8859-1 path */ private static String decodeISO88591Path(String path, int offset, int length) { StringBuilder builder = null; int end = offset + length; for (int i = offset; i < end; i++) { char c = path.charAt(i); switch (c) { case '%': if (builder == null) { builder = new StringBuilder(path.length()); builder.append(path, offset, i - offset); } if ((i + 2) < end) { char u = path.charAt(i + 1); if (u == 'u') { // TODO this is wrong. This is a codepoint not a char builder.append((char) (0xffff & TypeUtils.parseInt(path, i + 2, 4, 16))); i += 5; } else { builder.append((byte) (0xff & (TypeUtils.convertHexDigit(u) * 16 + TypeUtils.convertHexDigit(path.charAt(i + 2))))); i += 2; } } else { throw new IllegalArgumentException(); } break; case ';': if (builder == null) { builder = new StringBuilder(path.length()); builder.append(path, offset, i - offset); } while (++i < end) { if (path.charAt(i) == '/') { builder.append('/'); break; } } break; default: if (builder != null) builder.append(c); break; } } if (builder != null) return builder.toString(); if (offset == 0 && length == path.length()) return path; return path.substring(offset, end); } /** * Add two encoded URI path segments. * Handles null and empty paths, path and query params * (eg ?a=b or ;JSESSIONID=xxx) and avoids duplicate '/' * * @param p1 URI path segment (should be encoded) * @param p2 URI path segment (should be encoded) * @return Legally combined path segments. */ public static String addEncodedPaths(String p1, String p2) { if (p1 == null || p1.length() == 0) { if (p1 != null && p2 == null) return p1; return p2; } if (p2 == null || p2.length() == 0) return p1; int split = p1.indexOf(';'); if (split < 0) split = p1.indexOf('?'); if (split == 0) return p2 + p1; if (split < 0) split = p1.length(); StringBuilder buf = new StringBuilder(p1.length() + p2.length() + 2); buf.append(p1); if (buf.charAt(split - 1) == '/') { if (p2.startsWith(URIUtils.SLASH)) { buf.deleteCharAt(split - 1); buf.insert(split - 1, p2); } else buf.insert(split, p2); } else { if (p2.startsWith(URIUtils.SLASH)) buf.insert(split, p2); else { buf.insert(split, '/'); buf.insert(split + 1, p2); } } return buf.toString(); } /** * Add two Decoded URI path segments. * Handles null and empty paths. Path and query params (eg ?a=b or * ;JSESSIONID=xxx) are not handled * * @param p1 URI path segment (should be decoded) * @param p2 URI path segment (should be decoded) * @return Legally combined path segments. */ public static String addPaths(String p1, String p2) { if (p1 == null || p1.length() == 0) { if (p1 != null && p2 == null) return p1; return p2; } if (p2 == null || p2.length() == 0) return p1; boolean p1EndsWithSlash = p1.endsWith(SLASH); boolean p2StartsWithSlash = p2.startsWith(SLASH); if (p1EndsWithSlash && p2StartsWithSlash) { if (p2.length() == 1) return p1; if (p1.length() == 1) return p2; } StringBuilder buf = new StringBuilder(p1.length() + p2.length() + 2); buf.append(p1); if (p1.endsWith(SLASH)) { if (p2.startsWith(SLASH)) buf.setLength(buf.length() - 1); } else { if (!p2.startsWith(SLASH)) buf.append(SLASH); } buf.append(p2); return buf.toString(); } /** * Return the parent Path. * Treat a URI like a directory path and return the parent directory. * * @param p the path to return a parent reference to * @return the parent path of the URI */ public static String parentPath(String p) { if (p == null || URIUtils.SLASH.equals(p)) return null; int slash = p.lastIndexOf('/', p.length() - 2); if (slash >= 0) return p.substring(0, slash + 1); return null; } /** * Convert a decoded path to a canonical form. *

* All instances of "." and ".." are factored out. *

*

* Null is returned if the path tries to .. above its root. *

* * @param path the path to convert, decoded, with path separators '/' and no queries. * @return the canonical path, or null if path traversal above root. */ public static String canonicalPath(String path) { if (path == null || path.isEmpty()) return path; boolean slash = true; int end = path.length(); int i = 0; loop: while (i < end) { char c = path.charAt(i); switch (c) { case '/': slash = true; break; case '.': if (slash) break loop; slash = false; break; default: slash = false; } i++; } if (i == end) return path; StringBuilder canonical = new StringBuilder(path.length()); canonical.append(path, 0, i); int dots = 1; i++; while (i <= end) { char c = i < end ? path.charAt(i) : '\0'; switch (c) { case '\0': case '/': switch (dots) { case 0: if (c != '\0') canonical.append(c); break; case 1: break; case 2: if (canonical.length() < 2) return null; canonical.setLength(canonical.length() - 1); canonical.setLength(canonical.lastIndexOf("/") + 1); break; default: while (dots-- > 0) { canonical.append('.'); } if (c != '\0') canonical.append(c); } slash = true; dots = 0; break; case '.': if (dots > 0) dots++; else if (slash) dots = 1; else canonical.append('.'); slash = false; break; default: while (dots-- > 0) { canonical.append('.'); } canonical.append(c); dots = 0; slash = false; } i++; } return canonical.toString(); } /** * Convert a path to a cananonical form. *

* All instances of "." and ".." are factored out. *

*

* Null is returned if the path tries to .. above its root. *

* * @param path the path to convert (expects URI/URL form, encoded, and with path separators '/') * @return the canonical path, or null if path traversal above root. */ public static String canonicalEncodedPath(String path) { if (path == null || path.isEmpty()) return path; boolean slash = true; int end = path.length(); int i = 0; loop: while (i < end) { char c = path.charAt(i); switch (c) { case '/': slash = true; break; case '.': if (slash) break loop; slash = false; break; case '?': return path; default: slash = false; } i++; } if (i == end) return path; StringBuilder canonical = new StringBuilder(path.length()); canonical.append(path, 0, i); int dots = 1; i++; while (i <= end) { char c = i < end ? path.charAt(i) : '\0'; switch (c) { case '\0': case '/': case '?': switch (dots) { case 0: if (c != '\0') canonical.append(c); break; case 1: if (c == '?') canonical.append(c); break; case 2: if (canonical.length() < 2) return null; canonical.setLength(canonical.length() - 1); canonical.setLength(canonical.lastIndexOf("/") + 1); if (c == '?') canonical.append(c); break; default: while (dots-- > 0) { canonical.append('.'); } if (c != '\0') canonical.append(c); } slash = true; dots = 0; break; case '.': if (dots > 0) dots++; else if (slash) dots = 1; else canonical.append('.'); slash = false; break; default: while (dots-- > 0) { canonical.append('.'); } canonical.append(c); dots = 0; slash = false; } i++; } return canonical.toString(); } /** * Convert a path to a compact form. * All instances of "//" and "///" etc. are factored out to single "/" * * @param path the path to compact * @return the compacted path */ public static String compactPath(String path) { if (path == null || path.length() == 0) return path; int state = 0; int end = path.length(); int i = 0; loop: while (i < end) { char c = path.charAt(i); switch (c) { case '?': return path; case '/': state++; if (state == 2) break loop; break; default: state = 0; } i++; } if (state < 2) return path; StringBuilder buf = new StringBuilder(path.length()); buf.append(path, 0, i); loop2: while (i < end) { char c = path.charAt(i); switch (c) { case '?': buf.append(path, i, end); break loop2; case '/': if (state++ == 0) buf.append(c); break; default: state = 0; buf.append(c); } i++; } return buf.toString(); } /** * @param uri URI * @return True if the uri has a scheme */ public static boolean hasScheme(String uri) { for (int i = 0; i < uri.length(); i++) { char c = uri.charAt(i); if (c == ':') { return true; } if (!(c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || (i > 0 && (c >= '0' && c <= '9' || c == '.' || c == '+' || c == '-')))) { break; } } return false; } /** * Create a new URI from the arguments, handling IPv6 host encoding and default ports * * @param scheme the URI scheme * @param server the URI server * @param port the URI port * @param path the URI path * @param query the URI query * @return A String URI */ public static String newURI(String scheme, String server, int port, String path, String query) { StringBuilder builder = newURIBuilder(scheme, server, port); builder.append(path); if (query != null && query.length() > 0) builder.append('?').append(query); return builder.toString(); } /** * Create a new URI StringBuilder from the arguments, handling IPv6 host encoding and default ports * * @param scheme the URI scheme * @param server the URI server * @param port the URI port * @return a StringBuilder containing URI prefix */ public static StringBuilder newURIBuilder(String scheme, String server, int port) { StringBuilder builder = new StringBuilder(); appendSchemeHostPort(builder, scheme, server, port); return builder; } /** * Append scheme, host and port URI prefix, handling IPv6 address encoding and default ports * * @param url StringBuilder to append to * @param scheme the URI scheme * @param server the URI server * @param port the URI port */ public static void appendSchemeHostPort(StringBuilder url, String scheme, String server, int port) { url.append(scheme).append("://").append(HostPort.normalizeHost(server)); if (port > 0) { switch (scheme) { case "http": if (port != 80) url.append(':').append(port); break; case "https": if (port != 443) url.append(':').append(port); break; default: url.append(':').append(port); } } } public static boolean equalsIgnoreEncodings(String uriA, String uriB) { int lenA = uriA.length(); int lenB = uriB.length(); int a = 0; int b = 0; while (a < lenA && b < lenB) { int oa = uriA.charAt(a++); int ca = oa; if (ca == '%') { ca = lenientPercentDecode(uriA, a); if (ca == (-1)) { ca = '%'; } else { a += 2; } } int ob = uriB.charAt(b++); int cb = ob; if (cb == '%') { cb = lenientPercentDecode(uriB, b); if (cb == (-1)) { cb = '%'; } else { b += 2; } } // Don't match on encoded slash if (ca == '/' && oa != ob) return false; if (ca != cb) return false; } return a == lenA && b == lenB; } private static int lenientPercentDecode(String str, int offset) { if (offset >= str.length()) return -1; if (StringUtils.isHex(str, offset, 2)) { return TypeUtils.parseInt(str, offset, 2, 16); } else { return -1; } } public static boolean equalsIgnoreEncodings(URI uriA, URI uriB) { if (uriA.equals(uriB)) return true; if (uriA.getScheme() == null) { if (uriB.getScheme() != null) return false; } else if (!uriA.getScheme().equalsIgnoreCase(uriB.getScheme())) return false; if ("jar".equalsIgnoreCase(uriA.getScheme())) { // at this point we know that both uri's are "jar:" URI uriAssp = URI.create(uriA.getSchemeSpecificPart()); URI uriBssp = URI.create(uriB.getSchemeSpecificPart()); return equalsIgnoreEncodings(uriAssp, uriBssp); } if (uriA.getAuthority() == null) { if (uriB.getAuthority() != null) return false; } else if (!uriA.getAuthority().equals(uriB.getAuthority())) return false; return equalsIgnoreEncodings(uriA.getPath(), uriB.getPath()); } /** * @param uri A URI to add the path to * @param path A decoded path element * @return URI with path added. */ public static URI addPath(URI uri, String path) { String base = uri.toASCIIString(); StringBuilder buf = new StringBuilder(base.length() + path.length() * 3); buf.append(base); if (buf.charAt(base.length() - 1) != '/') buf.append('/'); int offset = path.charAt(0) == '/' ? 1 : 0; encodePath(buf, path, offset); return URI.create(buf.toString()); } public static URI getJarSource(URI uri) { try { if (!"jar".equals(uri.getScheme())) return uri; // Get SSP (retaining encoded form) String s = uri.getRawSchemeSpecificPart(); int bangSlash = s.indexOf("!/"); if (bangSlash >= 0) s = s.substring(0, bangSlash); return new URI(s); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); } } public static String getJarSource(String uri) { if (!uri.startsWith("jar:")) return uri; int bangSlash = uri.indexOf("!/"); return (bangSlash >= 0) ? uri.substring(4, bangSlash) : uri.substring(4); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy