All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.firefly.utils.lang.URIUtils Maven / Gradle / Ivy

There is a newer version: 5.0.2
Show newest version
package com.firefly.utils.lang;

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

import com.firefly.utils.StringUtils;
import com.firefly.utils.lang.Utf8Appendable.NotUtf8Exception;

/**
 * URI Utility methods.
 * 

* This class assists with the decoding and encoding or HTTP URI's. It differs * from the java.net.URL class as it does not provide communications ability, * but it does assist with query string formatting. *

*

* UTF-8 encoding is used by default for % encoded characters. *

* */ public class URIUtils implements Cloneable { public static final String SLASH = "/"; public static final String HTTP = "http"; public static final String HTTP_COLON = "http:"; public static final String HTTPS = "https"; public static final String HTTPS_COLON = "https:"; // Use UTF-8 as per // http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars public static final Charset __CHARSET = StandardCharsets.UTF_8; private URIUtils() { } /** * Encode a URI path. This is the same encoding offered by URLEncoder, * except that the '/' character is not encoded. * * @param path * The path the encode * @return The encoded path */ public static String encodePath(String path) { if (path == null || path.length() == 0) return path; StringBuilder buf = encodePath(null, path); return buf == null ? path : buf.toString(); } /** * Encode a URI path. * * @param path * The path the encode * @param buf * StringBuilder to encode path into (or null) * @return The StringBuilder or null if no substitutions required. */ public static StringBuilder encodePath(StringBuilder buf, String path) { byte[] bytes = null; if (buf == null) { loop: for (int i = 0; i < path.length(); i++) { char c = path.charAt(i); switch (c) { case '%': case '?': case ';': case '#': case '\'': case '"': case '<': case '>': case ' ': case '[': case ']': buf = new StringBuilder(path.length() * 2); break loop; default: if (c > 127) { bytes = path.getBytes(URIUtils.__CHARSET); buf = new StringBuilder(path.length() * 2); break loop; } } } if (buf == null) return null; } if (bytes != null) { for (int i = 0; i < bytes.length; i++) { byte c = bytes[i]; switch (c) { case '%': buf.append("%25"); continue; case '?': buf.append("%3F"); continue; case ';': buf.append("%3B"); continue; case '#': buf.append("%23"); continue; case '"': buf.append("%22"); continue; case '\'': buf.append("%27"); continue; case '<': buf.append("%3C"); continue; case '>': buf.append("%3E"); continue; case ' ': buf.append("%20"); continue; case '[': buf.append("%5B"); continue; case ']': buf.append("%5D"); continue; default: if (c < 0) { buf.append('%'); TypeUtils.toHex(c, buf); } else buf.append((char) c); continue; } } } else { for (int i = 0; i < path.length(); i++) { char c = path.charAt(i); switch (c) { case '%': buf.append("%25"); continue; case '?': buf.append("%3F"); continue; case ';': buf.append("%3B"); continue; case '#': buf.append("%23"); continue; case '"': buf.append("%22"); continue; case '\'': buf.append("%27"); continue; case '<': buf.append("%3C"); continue; case '>': buf.append("%3E"); continue; case ' ': buf.append("%20"); continue; case '[': buf.append("%5B"); continue; case ']': buf.append("%5D"); continue; default: buf.append(c); continue; } } } return buf; } /** * Encode a URI path. * * @param path * The path the encode * @param buf * StringBuilder to encode path into (or null) * @param encode * String of characters to encode. % is always encoded. * @return The StringBuilder or null if no substitutions required. */ public static StringBuilder encodeString(StringBuilder buf, String path, String encode) { if (buf == null) { loop: for (int i = 0; i < path.length(); i++) { char c = path.charAt(i); if (c == '%' || encode.indexOf(c) >= 0) { buf = new StringBuilder(path.length() << 1); break loop; } } if (buf == null) return null; } synchronized (buf) { for (int i = 0; i < path.length(); i++) { char c = path.charAt(i); if (c == '%' || encode.indexOf(c) >= 0) { buf.append('%'); StringUtils.append(buf, (byte) (0xff & c), 16); } else buf.append(c); } } return buf; } /* * Decode a URI path and strip parameters */ public static String decodePath(String path) { return decodePath(path, 0, path.length()); } /* * Decode a URI path and strip parameters of UTF-8 path */ public static String decodePath(String path, int offset, int length) { try { Utf8StringBuilder builder = null; int end = offset + length; for (int i = offset; i < end; i++) { char c = path.charAt(i); switch (c) { case '%': if (builder == null) { builder = new Utf8StringBuilder(path.length()); builder.append(path, offset, i - offset); } if ((i + 2) < end) { char u = path.charAt(i + 1); if (u == 'u') { // TODO this is wrong. This is a codepoint not a // char builder.append((char) (0xffff & TypeUtils.parseInt(path, i + 2, 4, 16))); i += 5; } else { builder.append((byte) (0xff & (TypeUtils.convertHexDigit(u) * 16 + TypeUtils.convertHexDigit(path.charAt(i + 2))))); i += 2; } } else { throw new IllegalArgumentException("Bad URI % encoding"); } break; case ';': if (builder == null) { builder = new Utf8StringBuilder(path.length()); builder.append(path, offset, i - offset); } while (++i < end) { if (path.charAt(i) == '/') { builder.append('/'); break; } } break; default: if (builder != null) builder.append(c); break; } } if (builder != null) return builder.toString(); if (offset == 0 && length == path.length()) return path; return path.substring(offset, end); } catch (NotUtf8Exception e) { return decodeISO88591Path(path, offset, length); } } /* * Decode a URI path and strip parameters of ISO-8859-1 path */ private static String decodeISO88591Path(String path, int offset, int length) { StringBuilder builder = null; int end = offset + length; for (int i = offset; i < end; i++) { char c = path.charAt(i); switch (c) { case '%': if (builder == null) { builder = new StringBuilder(path.length()); builder.append(path, offset, i - offset); } if ((i + 2) < end) { char u = path.charAt(i + 1); if (u == 'u') { // TODO this is wrong. This is a codepoint not a char builder.append((char) (0xffff & TypeUtils.parseInt(path, i + 2, 4, 16))); i += 5; } else { builder.append((byte) (0xff & (TypeUtils.convertHexDigit(u) * 16 + TypeUtils.convertHexDigit(path.charAt(i + 2))))); i += 2; } } else { throw new IllegalArgumentException(); } break; case ';': if (builder == null) { builder = new StringBuilder(path.length()); builder.append(path, offset, i - offset); } while (++i < end) { if (path.charAt(i) == '/') { builder.append('/'); break; } } break; default: if (builder != null) builder.append(c); break; } } if (builder != null) return builder.toString(); if (offset == 0 && length == path.length()) return path; return path.substring(offset, end); } /** * Add two URI path segments. Handles null and empty paths, path and query * params (eg ?a=b or ;JSESSIONID=xxx) and avoids duplicate '/' * * @param p1 * URI path segment (should be encoded) * @param p2 * URI path segment (should be encoded) * @return Legally combined path segments. */ public static String addPaths(String p1, String p2) { if (p1 == null || p1.length() == 0) { if (p1 != null && p2 == null) return p1; return p2; } if (p2 == null || p2.length() == 0) return p1; int split = p1.indexOf(';'); if (split < 0) split = p1.indexOf('?'); if (split == 0) return p2 + p1; if (split < 0) split = p1.length(); StringBuilder buf = new StringBuilder(p1.length() + p2.length() + 2); buf.append(p1); if (buf.charAt(split - 1) == '/') { if (p2.startsWith(URIUtils.SLASH)) { buf.deleteCharAt(split - 1); buf.insert(split - 1, p2); } else buf.insert(split, p2); } else { if (p2.startsWith(URIUtils.SLASH)) buf.insert(split, p2); else { buf.insert(split, '/'); buf.insert(split + 1, p2); } } return buf.toString(); } /** * Return the parent Path. Treat a URI like a directory path and return the * parent directory. * * @param p * the path to return a parent reference to * @return the parent path of the URI */ public static String parentPath(String p) { if (p == null || URIUtils.SLASH.equals(p)) return null; int slash = p.lastIndexOf('/', p.length() - 2); if (slash >= 0) return p.substring(0, slash + 1); return null; } /** * Convert a path to a cananonical form. All instances of "." and ".." are * factored out. Null is returned if the path tries to .. above its root. * * @param path * the path to convert * @return path or null. */ public static String canonicalPath(String path) { if (path == null || path.length() == 0) return path; int end = path.length(); int start = path.lastIndexOf('/', end); search: while (end > 0) { switch (end - start) { case 2: // possible single dot if (path.charAt(start + 1) != '.') break; break search; case 3: // possible double dot if (path.charAt(start + 1) != '.' || path.charAt(start + 2) != '.') break; break search; } end = start; start = path.lastIndexOf('/', end - 1); } // If we have checked the entire string if (start >= end) return path; StringBuilder buf = new StringBuilder(path); int delStart = -1; int delEnd = -1; int skip = 0; while (end > 0) { switch (end - start) { case 2: // possible single dot if (buf.charAt(start + 1) != '.') { if (skip > 0 && --skip == 0) { delStart = start >= 0 ? start : 0; if (delStart > 0 && delEnd == buf.length() && buf.charAt(delEnd - 1) == '.') delStart++; } break; } if (start < 0 && buf.length() > 2 && buf.charAt(1) == '/' && buf.charAt(2) == '/') break; if (delEnd < 0) delEnd = end; delStart = start; if (delStart < 0 || delStart == 0 && buf.charAt(delStart) == '/') { delStart++; if (delEnd < buf.length() && buf.charAt(delEnd) == '/') delEnd++; break; } if (end == buf.length()) delStart++; end = start--; while (start >= 0 && buf.charAt(start) != '/') start--; continue; case 3: // possible double dot if (buf.charAt(start + 1) != '.' || buf.charAt(start + 2) != '.') { if (skip > 0 && --skip == 0) { delStart = start >= 0 ? start : 0; if (delStart > 0 && delEnd == buf.length() && buf.charAt(delEnd - 1) == '.') delStart++; } break; } delStart = start; if (delEnd < 0) delEnd = end; skip++; end = start--; while (start >= 0 && buf.charAt(start) != '/') start--; continue; default: if (skip > 0 && --skip == 0) { delStart = start >= 0 ? start : 0; if (delEnd == buf.length() && buf.charAt(delEnd - 1) == '.') delStart++; } } // Do the delete if (skip <= 0 && delStart >= 0 && delEnd >= delStart) { buf.delete(delStart, delEnd); delStart = delEnd = -1; if (skip > 0) delEnd = end; } end = start--; while (start >= 0 && buf.charAt(start) != '/') start--; } // Too many .. if (skip > 0) return null; // Do the delete if (delEnd >= 0) buf.delete(delStart, delEnd); return buf.toString(); } /** * Convert a path to a compact form. All instances of "//" and "///" etc. * are factored out to single "/" * * @param path * the path to compact * @return the compacted path */ public static String compactPath(String path) { if (path == null || path.length() == 0) return path; int state = 0; int end = path.length(); int i = 0; loop: while (i < end) { char c = path.charAt(i); switch (c) { case '?': return path; case '/': state++; if (state == 2) break loop; break; default: state = 0; } i++; } if (state < 2) return path; StringBuffer buf = new StringBuffer(path.length()); buf.append(path, 0, i); loop2: while (i < end) { char c = path.charAt(i); switch (c) { case '?': buf.append(path, i, end); break loop2; case '/': if (state++ == 0) buf.append(c); break; default: state = 0; buf.append(c); } i++; } return buf.toString(); } /** * @param uri * URI * @return True if the uri has a scheme */ public static boolean hasScheme(String uri) { for (int i = 0; i < uri.length(); i++) { char c = uri.charAt(i); if (c == ':') return true; if (!(c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || (i > 0 && (c >= '0' && c <= '9' || c == '.' || c == '+' || c == '-')))) break; } return false; } /** * Create a new URI from the arguments, handling IPv6 host encoding and * default ports * * @param scheme * the URI scheme * @param server * the URI server * @param port * the URI port * @param path * the URI path * @param query * the URI query * @return A String URI */ public static String newURI(String scheme, String server, int port, String path, String query) { StringBuilder builder = newURIBuilder(scheme, server, port); builder.append(path); if (query != null && query.length() > 0) builder.append('?').append(query); return builder.toString(); } /** * Create a new URI StringBuilder from the arguments, handling IPv6 host * encoding and default ports * * @param scheme * the URI scheme * @param server * the URI server * @param port * the URI port * @return a StringBuilder containing URI prefix */ public static StringBuilder newURIBuilder(String scheme, String server, int port) { StringBuilder builder = new StringBuilder(); appendSchemeHostPort(builder, scheme, server, port); return builder; } /** * Append scheme, host and port URI prefix, handling IPv6 address encoding * and default ports * * @param url * StringBuilder to append to * @param scheme * the URI scheme * @param server * the URI server * @param port * the URI port */ public static void appendSchemeHostPort(StringBuilder url, String scheme, String server, int port) { if (server.indexOf(':') >= 0 && server.charAt(0) != '[') url.append(scheme).append("://").append('[').append(server).append(']'); else url.append(scheme).append("://").append(server); if (port > 0) { switch (scheme) { case "http": if (port != 80) url.append(':').append(port); break; case "https": if (port != 443) url.append(':').append(port); break; default: url.append(':').append(port); } } } /** * Append scheme, host and port URI prefix, handling IPv6 address encoding * and default ports * * @param url * StringBuffer to append to * @param scheme * the URI scheme * @param server * the URI server * @param port * the URI port */ public static void appendSchemeHostPort(StringBuffer url, String scheme, String server, int port) { synchronized (url) { if (server.indexOf(':') >= 0 && server.charAt(0) != '[') url.append(scheme).append("://").append('[').append(server).append(']'); else url.append(scheme).append("://").append(server); if (port > 0) { switch (scheme) { case "http": if (port != 80) url.append(':').append(port); break; case "https": if (port != 443) url.append(':').append(port); break; default: url.append(':').append(port); } } } } public static boolean equalsIgnoreEncodings(String uriA, String uriB) { int lenA = uriA.length(); int lenB = uriB.length(); int a = 0; int b = 0; while (a < lenA && b < lenB) { int oa = uriA.charAt(a++); int ca = oa; if (ca == '%') ca = TypeUtils.convertHexDigit(uriA.charAt(a++)) * 16 + TypeUtils.convertHexDigit(uriA.charAt(a++)); int ob = uriB.charAt(b++); int cb = ob; if (cb == '%') cb = TypeUtils.convertHexDigit(uriB.charAt(b++)) * 16 + TypeUtils.convertHexDigit(uriB.charAt(b++)); if (ca == '/' && oa != ob) return false; if (ca != cb) return URIUtils.decodePath(uriA).equals(URIUtils.decodePath(uriB)); } return a == lenA && b == lenB; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy