org.eclipse.jetty.util.URIUtil Maven / Gradle / Ivy
Show all versions of jetty-util Show documentation
//
// ========================================================================
// Copyright (c) 1995 Mort Bay Consulting Pty Ltd and others.
//
// This program and the accompanying materials are made available under the
// terms of the Eclipse Public License v. 2.0 which is available at
// https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
// which is available at https://www.apache.org/licenses/LICENSE-2.0.
//
// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
// ========================================================================
//
package org.eclipse.jetty.util;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLClassLoader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.InvalidPathException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.StringTokenizer;
import java.util.function.Supplier;
import java.util.stream.Stream;
import org.eclipse.jetty.util.resource.ResourceFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* URI Utility methods.
*
* This class assists with the decoding and encoding or HTTP URI's.
* It differs from the java.net.URL class as it does not provide
* communications ability, but it does assist with query string
* formatting.
*
*
* @see UrlEncoded
*/
public final class URIUtil
{
private static final Logger LOG = LoggerFactory.getLogger(URIUtil.class);
/**
* Port number indicating that the port is undefined.
*
* This is the same value as used in {@link java.net.URL} and {@link java.net.URI} classes.
*
*/
public static final int UNDEFINED_PORT = -1;
// From https://www.rfc-editor.org/rfc/rfc3986
private static final String UNRESERVED = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~";
private static final String SUBDELIMS = "!$&'()*+,;=";
private static final String REGNAME = UNRESERVED + SUBDELIMS;
// Allowed characters in https://www.rfc-editor.org/rfc/rfc3986 reg-name
private static final boolean[] REGNAME_ALLOWED;
static
{
REGNAME_ALLOWED = new boolean[128];
Arrays.fill(REGNAME_ALLOWED, false);
for (char c : REGNAME.toCharArray())
{
REGNAME_ALLOWED[c] = true;
}
}
/**
* The characters that are supported by the URI class and that can be decoded by {@link #canonicalPath(String)}
*/
private static final boolean[] URI_SUPPORTED_CHARACTERS = new boolean[]
{
false, // 0x00 is illegal
false, // 0x01 is illegal
false, // 0x02 is illegal
false, // 0x03 is illegal
false, // 0x04 is illegal
false, // 0x05 is illegal
false, // 0x06 is illegal
false, // 0x07 is illegal
false, // 0x08 is illegal
false, // 0x09 is illegal
false, // 0x0a is illegal
false, // 0x0b is illegal
false, // 0x0c is illegal
false, // 0x0d is illegal
false, // 0x0e is illegal
false, // 0x0f is illegal
false, // 0x10 is illegal
false, // 0x11 is illegal
false, // 0x12 is illegal
false, // 0x13 is illegal
false, // 0x14 is illegal
false, // 0x15 is illegal
false, // 0x16 is illegal
false, // 0x17 is illegal
false, // 0x18 is illegal
false, // 0x19 is illegal
false, // 0x1a is illegal
false, // 0x1b is illegal
false, // 0x1c is illegal
false, // 0x1d is illegal
false, // 0x1e is illegal
false, // 0x1f is illegal
false, // 0x20 space is illegal
true, // 0x21
false, // 0x22 " is illegal
false, // 0x23 # is special
true, // 0x24
false, // 0x25 % must remain encoded
true, // 0x26
true, // 0x27
true, // 0x28
true, // 0x29
true, // 0x2a
true, // 0x2b
true, // 0x2c
true, // 0x2d
true, // 0x2e
false, // 0x2f / is a delimiter
true, // 0x30
true, // 0x31
true, // 0x32
true, // 0x33
true, // 0x34
true, // 0x35
true, // 0x36
true, // 0x37
true, // 0x38
true, // 0x39
true, // 0x3a
false, // 0x3b ; is path parameter
false, // 0x3c < is illegal
true, // 0x3d
false, // 0x3e > is illegal
false, // 0x3f ? is special
true, // 0x40
true, // 0x41
true, // 0x42
true, // 0x43
true, // 0x44
true, // 0x45
true, // 0x46
true, // 0x47
true, // 0x48
true, // 0x49
true, // 0x4a
true, // 0x4b
true, // 0x4c
true, // 0x4d
true, // 0x4e
true, // 0x4f
true, // 0x50
true, // 0x51
true, // 0x52
true, // 0x53
true, // 0x54
true, // 0x55
true, // 0x56
true, // 0x57
true, // 0x58
true, // 0x59
true, // 0x5a
false, // 0x5b [ is illegal
false, // 0x5c \ is illegal
false, // 0x5d ] is illegal
false, // 0x5e ^ is illegal
true, // 0x5f
false, // 0x60 ` is illegal
true, // 0x61
true, // 0x62
true, // 0x63
true, // 0x64
true, // 0x65
true, // 0x66
true, // 0x67
true, // 0x68
true, // 0x69
true, // 0x6a
true, // 0x6b
true, // 0x6c
true, // 0x6d
true, // 0x6e
true, // 0x6f
true, // 0x70
true, // 0x71
true, // 0x72
true, // 0x73
true, // 0x74
true, // 0x75
true, // 0x76
true, // 0x77
true, // 0x78
true, // 0x79
true, // 0x7a
false, // 0x7b { is illegal
false, // 0x7c | is illegal
false, // 0x7d } is illegal
true, // 0x7e
false, // 0x7f DEL is illegal
};
private static final boolean[] ENCODE_PATH_NEEDS_ENCODING;
private URIUtil()
{
}
static
{
ENCODE_PATH_NEEDS_ENCODING = new boolean[128];
// Special characters
for (char c: "%?;#\"'<> [\\]^`{|}".toCharArray())
ENCODE_PATH_NEEDS_ENCODING[c] = true;
// control characters
ENCODE_PATH_NEEDS_ENCODING[0x7f] = true;
for (int i = 0; i < 0x20; i++)
ENCODE_PATH_NEEDS_ENCODING[i] = true;
}
/**
* Encode a URI path.
* This is the same encoding offered by URLEncoder, except that
* the '{@code /}' character is not encoded.
*
* @param path The path to encode
* @return The encoded path
*/
public static String encodePath(String path)
{
if (StringUtil.isEmpty(path))
return path;
// byte encoding always wins and, if encountered, should be used.
boolean needsByteEncoding = false;
// string (char-by-char) encoding, but it could be followed by a need for byte encoding instead
boolean needsEncoding = false;
int length = path.length();
for (int i = 0; i < length; i++)
{
char c = path.charAt(i);
if (c > 0x7F) // 8-bit +
{
needsByteEncoding = true;
break; // have to encode byte by byte now
}
if (ENCODE_PATH_NEEDS_ENCODING[c])
{
// could be followed by a byte encoding, so no break
needsEncoding = true;
}
}
if (needsByteEncoding)
return encodePathBytes(path);
else if (needsEncoding)
return encodePathString(path);
else
return path;
}
private static String encodePathString(String path)
{
StringBuilder buf = new StringBuilder(path.length() * 2);
int length = path.length();
for (int i = 0; i < length; i++)
{
char c = path.charAt(i);
if (ENCODE_PATH_NEEDS_ENCODING[c])
{
buf.append('%');
TypeUtil.toHex((byte)c, buf);
}
else
{
buf.append(c);
}
}
return buf.toString();
}
private static String encodePathBytes(String path)
{
StringBuilder buf = new StringBuilder(path.length() * 2);
byte[] pathBytes = path.getBytes(StandardCharsets.UTF_8);
for (byte b : pathBytes)
{
if (b < 0 || ENCODE_PATH_NEEDS_ENCODING[b])
{
buf.append('%');
TypeUtil.toHex(b, buf);
}
else
{
buf.append((char)b);
}
}
return buf.toString();
}
/**
* Encode a raw String and convert any specific characters to their URI encoded equivalent.
*
* @param str input raw string
* @param charsToEncode the list of raw characters that need to be encoded (if encountered)
* @return output with specified characters encoded.
*/
@SuppressWarnings("Duplicates")
public static String encodeSpecific(String str, String charsToEncode)
{
if ((str == null) || (str.length() == 0))
return null;
if ((charsToEncode == null) || (charsToEncode.length() == 0))
return str;
char[] find = charsToEncode.toCharArray();
int len = str.length();
StringBuilder ret = new StringBuilder((int)(len * 0.20d));
for (int i = 0; i < len; i++)
{
char c = str.charAt(i);
boolean escaped = false;
for (char f : find)
{
if (c == f)
{
escaped = true;
ret.append('%');
int d = 0xf & ((0xF0 & c) >> 4);
ret.append((char)((d > 9 ? ('A' - 10) : '0') + d));
d = 0xf & c;
ret.append((char)((d > 9 ? ('A' - 10) : '0') + d));
break;
}
}
if (!escaped)
{
ret.append(c);
}
}
return ret.toString();
}
/**
* Decode a raw String and convert any specific URI encoded sequences into characters.
*
* @param str input raw string
* @param charsToDecode the list of raw characters that need to be decoded (if encountered), leaving all the other encoded sequences alone.
* @return output with specified characters decoded.
*/
@SuppressWarnings("Duplicates")
public static String decodeSpecific(String str, String charsToDecode)
{
if ((str == null) || (str.length() == 0))
return null;
if ((charsToDecode == null) || (charsToDecode.length() == 0))
return str;
int idx = str.indexOf('%');
if (idx == -1)
{
// no hits
return str;
}
char[] find = charsToDecode.toCharArray();
int len = str.length();
Utf8StringBuilder ret = new Utf8StringBuilder(len);
ret.append(str, 0, idx);
for (int i = idx; i < len; i++)
{
char c = str.charAt(i);
if (c == '%')
{
if ((i + 2) < len)
{
char u = str.charAt(i + 1);
char l = str.charAt(i + 2);
char result = (char)(0xff & (TypeUtil.convertHexDigit(u) * 16 + TypeUtil.convertHexDigit(l)));
boolean decoded = false;
for (char f : find)
{
if (f == result)
{
ret.append(result);
decoded = true;
break;
}
}
if (decoded)
{
i += 2;
}
else
{
ret.append(c);
}
}
else
{
throw new IllegalArgumentException("Bad URI % encoding");
}
}
else
{
ret.append(c);
}
}
return ret.toCompleteString();
}
/**
* Encode a URI path.
*
* @param path The path to encode
* @param buf StringBuilder to encode path into (or null)
* @param encode String of characters to encode. '{@code %}' is always encoded.
* @return The StringBuilder or null if no substitutions required.
*/
// TODO: remove, only used in URIUtilTest?
public static StringBuilder encodeString(StringBuilder buf,
String path,
String encode)
{
if (buf == null)
{
for (int i = 0; i < path.length(); i++)
{
char c = path.charAt(i);
if (c == '%' || encode.indexOf(c) >= 0)
{
buf = new StringBuilder(path.length() << 1);
break;
}
}
if (buf == null)
return null;
}
for (int i = 0; i < path.length(); i++)
{
char c = path.charAt(i);
if (c == '%' || encode.indexOf(c) >= 0)
{
buf.append('%');
StringUtil.append(buf, (byte)(0xff & c), 16);
}
else
buf.append(c);
}
return buf;
}
/**
* Decodes a percent-encoded URI path (assuming UTF-8 characters) and strips path parameters.
* @param path The URI path to decode
* @see #canonicalPath(String)
* @see #normalizePath(String)
*/
public static String decodePath(String path)
{
return decodePath(path, 0, path.length());
}
/**
* Decodes a percent-encoded URI path (assuming UTF-8 characters) and strips path parameters.
* @param path A String holding the URI path to decode
* @param offset The start of the URI within the path string
* @param length The length of the URI within the path string
* @see #canonicalPath(String)
* @see #normalizePath(String)
*/
public static String decodePath(String path, int offset, int length)
{
try
{
Utf8StringBuilder builder = null;
int end = offset + length;
for (int i = offset; i < end; i++)
{
char c = path.charAt(i);
switch (c)
{
case '%':
if (builder == null)
{
builder = new Utf8StringBuilder(length);
builder.append(path, offset, i - offset);
}
if ((i + 2) < end)
{
char u = path.charAt(i + 1);
if (u == 'u')
{
// UTF16 encoding is only supported with UriCompliance.Violation.UTF16_ENCODINGS.
int[] codePoints = {(0xffff & TypeUtil.parseInt(path, i + 2, 4, 16))};
String str = new String(codePoints, 0, 1);
byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
for (byte b: bytes)
builder.append(b);
i += 5;
}
else
{
byte b = (byte)(0xff & (TypeUtil.convertHexDigit(u) * 16 + TypeUtil.convertHexDigit(path.charAt(i + 2))));
builder.append(b);
i += 2;
}
}
else
{
throw new IllegalArgumentException("Bad URI % encoding");
}
break;
case ';':
if (builder == null)
{
builder = new Utf8StringBuilder(path.length());
builder.append(path, offset, i - offset);
}
while (++i < end)
{
if (path.charAt(i) == '/')
{
builder.append('/');
break;
}
}
break;
default:
if (builder != null)
builder.append(c);
break;
}
}
if (builder != null)
{
return builder.toCompleteString();
}
if (offset == 0 && length == path.length())
return path;
return path.substring(offset, end);
}
catch (IllegalArgumentException e)
{
throw e;
}
catch (Exception e)
{
throw new IllegalArgumentException("cannot decode URI", e);
}
}
/**
* @param path The path to check for validity
* @return True if the path does not contain any invalid path characters
*/
public static boolean isPathValid(String path)
{
if (path == null)
return true;
int end = path.length();
for (int i = 0; i < end; i++)
{
char c = path.charAt(i);
switch (c)
{
case '?' :
case '#' :
return false;
}
}
return true;
}
/**
* Test if a string is a relative path or a URI
* @param uriOrPath A string that is either a path, a URI path segment or an absolute URI
* @return True if the string does not start with any absolute URI or file characters sequences.
*/
public static boolean isRelative(String uriOrPath)
{
if (uriOrPath.isEmpty())
return true;
char c = uriOrPath.charAt(0);
if (c == '/' || (File.separatorChar != '/' && c == File.separatorChar))
return false;
return !URIUtil.hasScheme(uriOrPath);
}
/**
* Test if codepoint is safe and unambiguous to pass as input to {@link URI}
*
* @param code the codepoint code to test
* @return true if safe to decode, otherwise false;
*/
private static boolean isSafe(int code)
{
// Allow any 8-bit character (as it's likely unicode).
// or any character labeled with true in __uriSupportedCharacters static
return (code >= URI_SUPPORTED_CHARACTERS.length || URI_SUPPORTED_CHARACTERS[code]);
}
/**
* If the codepoint is safe, do nothing, else add the UTF-8 URI encoded form of the codepoint.
*
* @param code the codepoint to check
* @param builder The builder to encode into
* @return true if the decoded value is safe to pass as input to {@link URI}, otherwise false;
*/
private static boolean isSafeElseEncode(int code, Utf8StringBuilder builder)
{
if (isSafe(code))
return true;
encodeCodepoint(code, builder);
return false;
}
private static void encodeCodepoint(int code, Utf8StringBuilder builder)
{
// Code point is 7-bit, simple encode
if (code <= 0x7F)
{
builder.append('%');
appendHexValue(builder, (byte)code);
}
else
{
// Code point is 8-bit, figure out the UTF-8 percent encoding for that codepoint and add it
int[] codePoints = {code};
String str = new String(codePoints, 0, 1);
byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
for (byte b: bytes)
{
builder.append('%');
appendHexValue(builder, b);
}
}
}
private static void appendHexValue(Utf8StringBuilder builder, byte value)
{
byte d = (byte)((0xF0 & value) >> 4);
builder.append((char)((d > 9 ? ('A' - 10) : '0') + d));
d = (byte)(0xF & value);
builder.append((char)((d > 9 ? ('A' - 10) : '0') + d));
}
/**
* Canonicalize a URI path to a form that is unambiguous and safe to use with the JVM {@link URI} class.
*
* Decode only the safe characters in a URI path and strip parameters of UTF-8 path.
* Safe characters are ones that are not special delimiters and that can be passed to the JVM {@link URI} class.
* Unsafe characters, other than '{@code /}' will be encoded. Encodings will be uppercase hex.
* Canonical paths are also normalized and may be used in string comparisons with other canonical paths.
*
* For example the path {@code /fo %2fo/b%61r} will be normalized to {@code /fo%20%2Fo/bar},
* whilst {@link #decodePath(String)} would result in the ambiguous and URI illegal {@code /fo /o/bar}.
* @param encodedPath An encoded URI path
* @return the canonical path or null if it is non-normal
* @see #decodePath(String)
* @see #normalizePath(String)
* @see URI
*/
public static String canonicalPath(String encodedPath)
{
return canonicalPath(encodedPath, null);
}
/**
* Canonicalize a URI path to a form that is unambiguous and safe to use with the JVM {@link URI} class.
*
* Decode only the safe characters in a URI path and strip parameters of UTF-8 path.
* Safe characters are ones that are not special delimiters and that can be passed to the JVM {@link URI} class.
* Unsafe characters, other than '{@code /}' will be encoded. Encodings will be uppercase hex.
* Canonical paths are also normalized and may be used in string comparisons with other canonical paths.
*
* For example the path {@code /fo %2fo/b%61r} will be normalized to {@code /fo%20%2Fo/bar},
* whilst {@link #decodePath(String)} would result in the ambiguous and URI illegal {@code /fo /o/bar}.
* @param encodedPath An encoded URI path
* @param onBadUtf8 A supplier of exceptions if bad UTF8 is encountered, or null for no exception thrown.
* @return the canonical path or null if it is non-normal
* @see #decodePath(String)
* @see #normalizePath(String)
* @see URI
*/
public static String canonicalPath(String encodedPath, Supplier onBadUtf8) throws X
{
if (encodedPath == null)
return null;
Utf8StringBuilder builder = null;
int end = encodedPath.length();
boolean slash = true;
boolean normal = true;
for (int i = 0; i < end; i++)
{
char c = encodedPath.charAt(i);
switch (c)
{
case '%':
if (builder == null)
{
builder = new Utf8StringBuilder(encodedPath.length());
builder.append(encodedPath, 0, i);
}
if ((i + 2) < end)
{
char u = encodedPath.charAt(i + 1);
if (u == 'u')
{
// UTF16 encoding is only supported with UriCompliance.Violation.UTF16_ENCODINGS.
int code = TypeUtil.parseInt(encodedPath, i + 2, 4, 16);
if (isSafeElseEncode(code, builder))
{
char[] chars = Character.toChars(code);
for (char ch : chars)
{
builder.append(ch);
if (slash && ch == '.')
normal = false;
slash = false;
}
}
i += 5;
}
else
{
int code = TypeUtil.convertHexDigit(u) * 16 + TypeUtil.convertHexDigit(encodedPath.charAt(i + 2));
if (isSafeElseEncode(code, builder))
{
builder.append((byte)(0xff & code));
if (slash && code == '.')
normal = false;
}
i += 2;
}
}
else
{
throw new IllegalArgumentException("Bad URI % encoding");
}
break;
case ';':
if (builder == null)
{
builder = new Utf8StringBuilder(encodedPath.length());
builder.append(encodedPath, 0, i);
}
while (++i < end)
{
if (encodedPath.charAt(i) == '/')
{
builder.append('/');
break;
}
}
break;
case '/':
if (builder != null)
builder.append(c);
break;
case '.':
if (slash)
normal = false;
if (builder != null)
builder.append(c);
break;
default:
if (builder == null && !isSafe(c))
{
builder = new Utf8StringBuilder(encodedPath.length());
builder.append(encodedPath, 0, i);
}
if (builder != null && isSafeElseEncode(c, builder))
builder.append(c);
break;
}
slash = c == '/';
}
String canonical = (builder != null)
? (onBadUtf8 == null ? builder.toCompleteString() : builder.takeCompleteString(onBadUtf8))
: encodedPath;
return normal ? canonical : normalizePath(canonical);
}
/**
* Add two encoded URI path segments.
* Handles null and empty paths, path and query params
* (e.g. {@code ?a=b} or {@code ;JSESSIONID=xxx}) and avoids duplicate '{@code /}'
*
* @param p1 URI path segment (should be encoded)
* @param p2 URI path segment (should be encoded)
* @return Legally combined path segments.
*/
public static String addEncodedPaths(String p1, String p2)
{
if (p1 == null || p1.length() == 0)
{
if (p1 != null && p2 == null)
return p1;
return p2;
}
if (p2 == null || p2.length() == 0)
return p1;
int split = p1.indexOf(';');
if (split < 0)
split = p1.indexOf('?');
if (split == 0)
return p2 + p1;
if (split < 0)
split = p1.length();
StringBuilder buf = new StringBuilder(p1.length() + p2.length() + 2);
buf.append(p1);
if (buf.charAt(split - 1) == '/')
{
if (p2.startsWith("/"))
{
buf.deleteCharAt(split - 1);
buf.insert(split - 1, p2);
}
else
buf.insert(split, p2);
}
else
{
if (p2.startsWith("/"))
buf.insert(split, p2);
else
{
buf.insert(split, '/');
buf.insert(split + 1, p2);
}
}
return buf.toString();
}
/**
* Add two Decoded URI path segments.
*
* Handles null and empty paths.
* Path and query params (e.g. {@code ?a=b} or {@code ;JSESSIONID=xxx}) are not handled
*
*
* @param p1 URI path segment (should be decoded)
* @param p2 URI path segment (should be decoded)
* @return Legally combined path segments.
*/
public static String addPaths(String p1, String p2)
{
if (p1 == null || p1.length() == 0)
{
if (p1 != null && p2 == null)
return p1;
return p2;
}
if (p2 == null || p2.length() == 0)
return p1;
boolean p1EndsWithSlash = p1.endsWith("/");
boolean p2StartsWithSlash = p2.startsWith("/");
if (p1EndsWithSlash && p2StartsWithSlash)
{
if (p2.length() == 1)
return p1;
if (p1.length() == 1)
return p2;
}
StringBuilder buf = new StringBuilder(p1.length() + p2.length() + 2);
buf.append(p1);
if (p1.endsWith("/"))
{
if (p2.startsWith("/"))
buf.setLength(buf.length() - 1);
}
else
{
if (!p2.startsWith("/"))
buf.append("/");
}
buf.append(p2);
return buf.toString();
}
/**
* Add a path and a query string
*
* @param path The path which may already contain a query
* @param query The query string to add (if blank, no query is added)
* @return The path with any non-blank query added after a '{@code ?}' or '{@code &}' as appropriate.
*/
public static String addPathQuery(String path, String query)
{
if (StringUtil.isBlank(query))
return path;
if (path.indexOf('?') >= 0)
return path + '&' + query;
return path + '?' + query;
}
/**
* Given a URI, attempt to get the last segment.
*
* If this is a {@code jar:file://} style URI, then
* the JAR filename is returned (not the deep {@code !/path} location)
*
*
* @param uri the URI to look in
* @return the last segment.
*/
public static String getUriLastPathSegment(URI uri)
{
String ssp = uri.getSchemeSpecificPart();
// strip off deep jar:file: reference information
int idx = ssp.indexOf("!/");
if (idx != -1)
{
ssp = ssp.substring(0, idx);
}
// Strip off trailing '/' if present
if (ssp.endsWith("/"))
{
ssp = ssp.substring(0, ssp.length() - 1);
}
// Only interested in last segment
idx = ssp.lastIndexOf('/');
if (idx != -1)
{
ssp = ssp.substring(idx + 1);
}
return ssp;
}
/**
* Return the parent Path.
*
* Treat a URI like a directory path and return the parent directory.
*
*
* @param p the path to return a parent reference to
* @return the parent path of the URI
*/
public static String parentPath(String p)
{
if (p == null || "/".equals(p))
return null;
int slash = p.lastIndexOf('/', p.length() - 2);
if (slash >= 0)
return p.substring(0, slash + 1);
return null;
}
/**
*
* Normalize a URI path and query by factoring out all segments of '{@code .}' and '{@code ..}'
* up until any query or fragment.
* Null is returned if the path is normalized above its root.
*
*
* @param pathQuery the encoded URI from the path onwards, which may contain query strings and/or fragments
* @return the normalized path, or null if path traversal above root.
* @see #normalizePath(String)
*/
public static String normalizePathQuery(String pathQuery)
{
if (pathQuery == null || pathQuery.isEmpty())
return pathQuery;
boolean slash = true;
int end = pathQuery.length();
int i = 0;
// Initially just loop looking if we may need to normalize
loop: while (i < end)
{
char c = pathQuery.charAt(i);
switch (c)
{
case '/':
slash = true;
break;
case '.':
if (slash)
break loop;
break;
case '?':
case '#':
// Nothing to normalize so return original path
return pathQuery;
default:
slash = false;
}
i++;
}
// Nothing to normalize so return original path
if (i == end)
return pathQuery;
// We probably need to normalize, so copy to path so far into builder
StringBuilder canonical = new StringBuilder(pathQuery.length());
canonical.append(pathQuery, 0, i);
// Loop looking for single and double dot segments
int dots = 1;
i++;
loop : while (i < end)
{
char c = pathQuery.charAt(i);
switch (c)
{
case '/':
if (doDotsSlash(canonical, dots))
return null;
slash = true;
dots = 0;
break;
case '?':
case '#':
// finish normalization at a query
break loop;
case '.':
// Count dots only if they are leading in the segment
if (dots > 0)
dots++;
else if (slash)
dots = 1;
else
canonical.append('.');
slash = false;
break;
default:
// Add leading dots to the path
while (dots-- > 0)
canonical.append('.');
canonical.append(c);
dots = 0;
slash = false;
}
i++;
}
// process any remaining dots
if (doDots(canonical, dots))
return null;
// append any query
if (i < end)
canonical.append(pathQuery, i, end);
return canonical.toString();
}
/**
* Check if a path would be normalized within itself. For example,
* {@code /foo/../../bar} is normalized above its root and would
* thus return false, whilst {@code /foo/./bar/..} is normal within itself
* and would return true.
* @param path The path to check
* @return True if the normal form of the path is within the root of the path.
*/
public static boolean isNotNormalWithinSelf(String path)
{
// TODO this can be optimized to avoid allocation.
return normalizePath(path) == null;
}
/**
*
Normalize a URI path by factoring out all segments of {@code .} and {@code ..}.
* Null is returned if the path is normalized above its root.
*
*
* @param path the decoded URI path to convert. Any special characters (e.g. {@code ?}, {@code #}) are assumed to be part of
* the path segments.
* @return the normalized path, or null if path traversal above root.
* @see #normalizePathQuery(String)
* @see #canonicalPath(String)
* @see #decodePath(String)
*/
public static String normalizePath(String path)
{
if (path == null || path.isEmpty())
return path;
boolean slash = true;
int end = path.length();
int i = 0;
// Initially just loop looking if we may need to normalize
loop: while (i < end)
{
char c = path.charAt(i);
switch (c)
{
case '/' -> slash = true;
case '.' ->
{
if (slash)
break loop;
}
default -> slash = false;
}
i++;
}
// Nothing to normalize so return original path
if (i == end)
return path;
// We probably need to normalize, so copy to path so far into builder
StringBuilder canonical = new StringBuilder(path.length());
canonical.append(path, 0, i);
// Loop looking for single and double dot segments
int dots = 1;
i++;
while (i < end)
{
char c = path.charAt(i);
switch (c)
{
case '/' ->
{
if (doDotsSlash(canonical, dots))
return null;
slash = true;
dots = 0;
}
case '.' ->
{
// Count dots only if they are leading in the segment
if (dots > 0)
dots++;
else if (slash)
dots = 1;
else
canonical.append('.');
slash = false;
}
default ->
{
// Add leading dots to the path
while (dots-- > 0)
canonical.append('.');
canonical.append(c);
dots = 0;
slash = false;
}
}
i++;
}
// process any remaining dots
if (doDots(canonical, dots))
return null;
return canonical.toString();
}
private static boolean doDots(StringBuilder canonical, int dots)
{
switch (dots)
{
case 0:
case 1:
break;
case 2:
if (canonical.length() < 2)
return true;
canonical.setLength(canonical.length() - 1);
canonical.setLength(canonical.lastIndexOf("/") + 1);
break;
default:
while (dots-- > 0)
canonical.append('.');
}
return false;
}
private static boolean doDotsSlash(StringBuilder canonical, int dots)
{
switch (dots)
{
case 0:
canonical.append('/');
break;
case 1:
break;
case 2:
if (canonical.length() < 2)
return true;
canonical.setLength(canonical.length() - 1);
canonical.setLength(canonical.lastIndexOf("/") + 1);
break;
default:
while (dots-- > 0)
canonical.append('.');
canonical.append('/');
}
return false;
}
/**
* Convert a path to a compact form.
* All instances of {@code //} and {@code ///} etc. are factored out to single {@code /}
*
* @param path the path to compact
* @return the compacted path
*/
public static String compactPath(String path)
{
if (path == null || path.length() == 0)
return path;
int state = 0;
int end = path.length();
int i = 0;
loop:
while (i < end)
{
char c = path.charAt(i);
switch (c)
{
case '?':
return path;
case '/':
state++;
if (state == 2)
break loop;
break;
default:
state = 0;
}
i++;
}
if (state < 2)
return path;
StringBuilder buf = new StringBuilder(path.length());
buf.append(path, 0, i);
loop2:
while (i < end)
{
char c = path.charAt(i);
switch (c)
{
case '?':
buf.append(path, i, end);
break loop2;
case '/':
if (state++ == 0)
buf.append(c);
break;
default:
state = 0;
buf.append(c);
}
i++;
}
return buf.toString();
}
/**
* @param uri URI
* @return True if the uri has a scheme
*/
public static boolean hasScheme(String uri)
{
for (int i = 0; i < uri.length(); i++)
{
char c = uri.charAt(i);
if (c == ':')
return true;
if (!(c >= 'a' && c <= 'z' ||
c >= 'A' && c <= 'Z' ||
(i > 0 && (c >= '0' && c <= '9' ||
c == '.' ||
c == '+' ||
c == '-'))))
{
break;
}
}
return false;
}
/**
* True if token is a RFC3986 {@code reg-name} (Registered Name)
*
* @param token the to test
* @return true if the token passes as a valid Host Registered Name
*/
public static boolean isValidHostRegisteredName(String token)
{
/* reg-name ABNF is defined as :
* reg-name = *( unreserved / pct-encoded / sub-delims )
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* pct-encoded = "%" HEXDIG HEXDIG
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
* / "*" / "+" / "," / ";" / "="
*/
if (token == null)
return true; // null token is considered valid
int length = token.length();
for (int i = 0; i < length; i++)
{
char c = token.charAt(i);
if (c > 127)
return false;
if (REGNAME_ALLOWED[c])
continue;
if (c == '%')
{
if (StringUtil.isHex(token, i + 1, 2))
{
i += 2;
continue;
}
else
{
return false;
}
}
return false;
}
return true;
}
/**
* Create a new URI from the arguments, handling IPv6 host encoding and default ports
*
* @param scheme the URI scheme
* @param server the URI server
* @param port the URI port
* @return A String URI
*/
public static String newURI(String scheme, String server, int port)
{
return newURI(scheme, server, port, null, null);
}
/**
* Create a new URI from the arguments, handling IPv6 host encoding and default ports
*
* @param scheme the URI scheme
* @param server the URI server
* @param port the URI port
* @param path the URI path
* @param query the URI query
* @return A String URI
*/
public static String newURI(String scheme, String server, int port, String path, String query)
{
return newURI(scheme, server, port, path, query, null);
}
/**
* Create a new URI from the arguments, handling IPv6 host encoding and default ports
*
* @param scheme the URI scheme
* @param server the URI server
* @param port the URI port
* @param path the URI path
* @param query the URI query
* @param fragment the URI fragment
* @return A String URI
*/
public static String newURI(String scheme, String server, int port, String path, String query, String fragment)
{
StringBuilder builder = newURIBuilder(scheme, server, port);
// check only for null, as empty query/fragment have meaning.
// this also matches the behavior of java URL & URI
boolean hasQuery = query != null;
boolean hasFragment = fragment != null;
if (StringUtil.isNotBlank(path))
builder.append(path);
else if (hasQuery || hasFragment)
builder.append('/');
if (hasQuery)
builder.append('?').append(query);
if (hasFragment)
builder.append('#').append(fragment);
return builder.toString();
}
/**
* Create a new URI StringBuilder from the arguments, handling IPv6 host encoding and default ports
*
* @param scheme the URI scheme
* @param server the URI server
* @param port the URI port
* @return a StringBuilder containing URI prefix
*/
public static StringBuilder newURIBuilder(String scheme, String server, int port)
{
StringBuilder builder = new StringBuilder(128);
appendSchemeHostPort(builder, scheme, server, port);
return builder;
}
/**
* Append scheme, host and port URI prefix, handling IPv6 address encoding and default ports
*
* @param url StringBuilder to append to
* @param scheme the URI scheme
* @param server the URI server
* @param port the URI port
*/
public static void appendSchemeHostPort(StringBuilder url, String scheme, String server, int port)
{
scheme = normalizeScheme(scheme);
url.append(scheme).append("://").append(HostPort.normalizeHost(server));
port = normalizePortForScheme(scheme, port);
if (port > 0)
url.append(':').append(port);
}
/**
* Append scheme, host and port URI prefix, handling IPv6 address encoding and default ports
*
* @param url StringBuffer to append to
* @param scheme the URI scheme
* @param server the URI server
* @param port the URI port
* @deprecated Use {@link #appendSchemeHostPort(StringBuilder, String, String, int)}
*/
@Deprecated
public static void appendSchemeHostPort(StringBuffer url, String scheme, String server, int port)
{
scheme = normalizeScheme(scheme);
url.append(scheme).append("://").append(HostPort.normalizeHost(server));
port = normalizePortForScheme(scheme, port);
if (port > 0)
url.append(':').append(port);
}
/**
* Encode characters in a path to ensure they only contain safe encodings suitable for both
* {@link URI} and {@link Paths#get(URI)} usage.
*
* @param path the path to encode
* @return the returned path with only safe encodings
*/
public static String encodePathSafeEncoding(String path)
{
if (path == null)
return null;
if ("".equals(path) || "/".equals(path))
return path;
int offset = 0;
int length = path.length();
try
{
Utf8StringBuilder builder = null;
int end = offset + length;
for (int i = offset; i < end; i++)
{
char c = path.charAt(i);
if (c == '%')
{
if (builder == null)
{
builder = new Utf8StringBuilder(path.length());
builder.append(path, offset, i - offset);
}
if ((i + 2) < end)
{
char u = path.charAt(i + 1);
if (u == 'u')
{
if (TypeUtil.isHex(path, i + 2, 4))
{
// Always decode percent-u encoding to UTF-8
int codepoint = (0xffff & TypeUtil.parseInt(path, i + 2, 4, 16));
encodeCodepoint(codepoint, builder);
i += 5;
}
else
{
// not valid percent-u, encode the percent symbol
builder.append("%25");
}
}
else
{
if (TypeUtil.isHex(path, i + 1, 2))
{
// valid Hex, attempt to decode it
byte b = (byte)(0xff & (TypeUtil.convertHexDigit(u) * 16 + TypeUtil.convertHexDigit(path.charAt(i + 2))));
if (mustBeEncoded(b) || b == 0x2F)
{
// unsafe, keep encoding
encodeCodepoint(b, builder);
}
else
{
// safe to decode
builder.append(b);
}
i += 2;
}
else
{
// not valid percent encoding, encode the percent symbol
builder.append("%25");
}
}
}
else
{
// incomplete percent encoding, encode the percent symbol
builder.append("%25");
}
}
else
{
if (mustBeEncoded(c))
{
if (builder == null)
{
builder = new Utf8StringBuilder(path.length());
builder.append(path, offset, i - offset);
}
encodeCodepoint(c, builder);
}
else
{
if (builder != null)
builder.append(c);
}
}
}
if (builder != null)
return builder.toCompleteString();
return path;
}
catch (IllegalArgumentException e)
{
throw e;
}
catch (Exception e)
{
throw new IllegalArgumentException("cannot decode URI", e);
}
}
/**
* Check codepoint for rules on URI encoding.
*
*
* This does not allow 8-bit characters, unlike {@link #isSafe(int)}
*
*
* @param codepoint the codepoint to check
* @return true if the codepoint must be encoded, false otherwise
*/
private static boolean mustBeEncoded(int codepoint)
{
// 8-bit
if (codepoint > 0x7F)
return true;
// control characters
if ((codepoint <= 0x1F) || (codepoint == 0x7F)) // control characters
return true;
// unsafe characters
if (codepoint == '"' || codepoint == '<' || codepoint == '>' || codepoint == '%' ||
codepoint == '{' || codepoint == '}' || codepoint == '|' || codepoint == '\\' ||
codepoint == '^' || codepoint == '`')
return true;
// additional raw characters rejected by java.net.URI
if ((codepoint == ' ') || (codepoint == '[') || (codepoint == ']'))
return true;
// additional raw characters rejected by Paths.get(URI)
return ((codepoint == '?') || (codepoint == '#'));
}
/**
* Add a sub path to an existing URI.
*
* @param uri A URI to add the path to
* @param path A safe path element
* @return URI with path added.
* @see #addPaths(String, String)
*/
public static URI addPath(URI uri, String path)
{
Objects.requireNonNull(uri, "URI");
if (path == null || "".equals(path))
return uri;
// collapse any "//" paths in the path portion
path = compactPath(path);
int pathLen = path.length();
if (pathLen == 0)
return uri;
// Correct any bad `file:/path` usages, and
// force encoding of characters that must be encoded (such as unicode)
// for the base
String base = correctURI(uri).toASCIIString();
// ensure that the base has a safe encoding suitable for both
// URI and Paths.get(URI) later usage
path = encodePathSafeEncoding(path);
pathLen = path.length();
if (base.length() == 0)
return URI.create(path);
StringBuilder buf = new StringBuilder(base.length() + pathLen * 3);
buf.append(base);
if (buf.charAt(base.length() - 1) != '/')
buf.append('/');
// collapse any "//" paths in the path portion
int offset = path.charAt(0) == '/' ? 1 : 0;
buf.append(path, offset, pathLen);
return URI.create(buf.toString());
}
/**
* Combine two query strings into one. Each query string should not contain the beginning '{@code ?}' character, but
* may contain multiple parameters separated by the '{@code &}' character.
* @param query1 the first query string.
* @param query2 the second query string.
* @return the combination of the two query strings.
*/
public static String addQueries(String query1, String query2)
{
if (StringUtil.isEmpty(query1))
return query2;
if (StringUtil.isEmpty(query2))
return query1;
return query1 + '&' + query2;
}
/**
*
* Corrects any bad {@code file} based URIs (even within a {@code jar:file:} based URIs) from the bad out-of-spec
* format that various older Java APIs creates (most notably: {@link java.io.File} creates with it's {@link File#toURL()}
* and {@link File#toURI()}, along with the side effects of using {@link URL#toURI()})
*
*
*
* This correction is limited to only the {@code file:/} substring in the URI.
* If there is a {@code file:/} detected, that substring is corrected to
* {@code file:///}, all other uses of {@code file:}, and URIs without a {@code file:}
* substring are left alone.
*
*
*
* Note that Windows UNC based URIs are left alone, along with non-absolute URIs.
*
*
* @param uri the URI to (possibly) correct
* @return the new URI with the {@code file:/} substring corrected, or the original URI.
* @deprecated use {@link #correctURI(URI)} instead, will be removed in Jetty 12.1.0
*/
@Deprecated(since = "12.0.7", forRemoval = true)
public static URI correctFileURI(URI uri)
{
return correctURI(uri);
}
/**
*
* Corrects any bad {@code file} based URIs (even within a {@code jar:file:} based URIs) from the bad out-of-spec
* format that various older Java APIs creates (most notably: {@link java.io.File} creates with it's {@link File#toURL()}
* and {@link File#toURI()}, along with the side effects of using {@link URL#toURI()})
*
*
*
* This correction is currently limited to only the {@code file:/} substring in the URI.
* If there is a {@code file:/} detected, that substring is corrected to
* {@code file:///}, all other uses of {@code file:}, and URIs without a {@code file:}
* substring are left alone.
*
*
*
* Note that Windows UNC based URIs are left alone, along with non-absolute URIs.
*
*
* @param uri the URI to (possibly) correct
* @return the new URI with the {@code file:} scheme specific part corrected, or the original URI.
*/
public static URI correctURI(URI uri)
{
if ((uri == null) || (uri.getScheme() == null))
return uri;
if (!uri.getScheme().equalsIgnoreCase("file") && !uri.getScheme().equalsIgnoreCase("jar"))
return uri; // not a scheme we can fix
if (uri.getRawAuthority() != null)
return uri; // already valid (used in Windows UNC uris)
if (!uri.isAbsolute())
return uri; // non-absolute URI cannot be fixed
String rawURI = uri.toASCIIString();
int colon = rawURI.indexOf(":/");
if (colon < 0)
return uri; // path portion not found
int end = -1;
if (rawURI.charAt(colon + 2) != '/')
end = colon + 2;
if (end >= 0)
return URI.create(rawURI.substring(0, colon) + ":///" + rawURI.substring(end));
return uri;
}
/**
* Split a string of references, that may be split with '{@code ,}', or '{@code ;}', or '{@code |}' into URIs.
*
* Each part of the input string could be path references (unix or windows style), or string URI references.
*
*
* If the result of processing the input segment is a java archive, then its resulting URI will be a mountable URI as {@code jar:file:...!/}
*
*
* @param str the input string of references
* @see #toJarFileUri(URI)
* @deprecated use {@link ResourceFactory#split(String)}
*/
@Deprecated(since = "12.0.8", forRemoval = true)
public static List split(String str)
{
List uris = new ArrayList<>();
StringTokenizer tokenizer = new StringTokenizer(str, ",;|");
while (tokenizer.hasMoreTokens())
{
String reference = tokenizer.nextToken();
try
{
// Is this a glob reference?
if (reference.endsWith("/*") || reference.endsWith("\\*"))
{
String dir = reference.substring(0, reference.length() - 2);
Path pathDir = Paths.get(dir);
// Use directory
if (Files.exists(pathDir) && Files.isDirectory(pathDir))
{
// To obtain the list of entries
try (Stream listStream = Files.list(pathDir))
{
listStream
.filter(Files::isRegularFile)
.filter(FileID::isLibArchive)
.sorted(Comparator.naturalOrder())
.forEach(path -> uris.add(toJarFileUri(path.toUri())));
}
catch (IOException e)
{
throw new RuntimeException("Unable to process directory glob listing: " + reference, e);
}
}
}
else
{
// Simple reference
URI refUri = toURI(reference);
// Ensure that a Java Archive that can be mounted
uris.add(toJarFileUri(refUri));
}
}
catch (Exception e)
{
LOG.warn("Invalid Resource Reference: " + reference);
throw e;
}
}
return uris;
}
/**
*
* Take an arbitrary URI and provide a URI that is suitable for mounting the URI as a Java FileSystem.
*
*
* The resulting URI will point to the {@code jar:file://foo.jar!/} said Java Archive (jar, war, or zip)
*
*
* @param uri the URI to mutate to a {@code jar:file:...} URI.
* @return the {@code jar:${uri_to_java_archive}!/${internal-reference}} URI or the unchanged URI if not a Java Archive.
* @see FileID#isArchive(URI)
*/
public static URI toJarFileUri(URI uri)
{
Objects.requireNonNull(uri, "URI");
String scheme = Objects.requireNonNull(uri.getScheme(), "URI scheme");
boolean hasInternalReference = uri.getRawSchemeSpecificPart().indexOf("!/") > 0;
if (scheme.equalsIgnoreCase("jar"))
{
if (uri.getRawSchemeSpecificPart().startsWith("file:"))
{
// Looking good as a jar:file: URI
if (hasInternalReference)
return uri; // is all good, no changes needed.
else
// add the internal reference indicator to the root of the archive
return URI.create(uri.toASCIIString() + "!/");
}
}
else if (scheme.equalsIgnoreCase("file"))
{
String rawUri = uri.toASCIIString();
if (rawUri.endsWith("/")) // skip directories
return uri;
if (hasInternalReference)
return URI.create("jar:" + rawUri);
else
return URI.create("jar:" + rawUri + "!/");
}
// shouldn't be possible to reach this point
throw new IllegalArgumentException("Cannot make %s into `jar:file:` URI".formatted(uri));
}
/**
* Convert a String into a URI suitable for use as a Resource.
*
* @param resource If the string starts with one of the ALLOWED_SCHEMES, then it is assumed to be a
* representation of a {@link URI}, otherwise it is treated as a {@link Path}.
* @return The {@link URI} form of the resource.
* @deprecated This method is currently resolving relative paths against the current directory, which is a mechanism
* that should be implemented by a {@link ResourceFactory}. All calls to this method need to be reviewed.
*/
@Deprecated(since = "12.0.8")
public static URI toURI(String resource)
{
Objects.requireNonNull(resource);
if (URIUtil.hasScheme(resource))
{
try
{
URI uri = new URI(resource);
if (ResourceFactory.isSupported(uri))
return correctURI(uri);
// We don't have a supported URI scheme
if (uri.getScheme().length() == 1)
{
// Input is a possible Windows path disguised as a URI "D:/path/to/resource.txt".
try
{
return toURI(Paths.get(resource).toUri().toASCIIString());
}
catch (InvalidPathException x)
{
LOG.trace("ignored", x);
}
}
// If we reached this point, that means the input String has a scheme,
// and is not recognized as supported by the registered schemes in ResourceFactory.
if (LOG.isDebugEnabled())
LOG.debug("URI scheme is not registered: {}", uri.toASCIIString());
throw new IllegalArgumentException("URI scheme not registered: " + uri.getScheme());
}
catch (URISyntaxException x)
{
// We have an input string that has what looks like a scheme, but isn't a URI.
// Eg: "C:\path\to\resource.txt"
LOG.trace("ignored", x);
}
}
// If we reached this point, we have a String with no valid scheme.
// Treat it as a Path, as that's all we have left to investigate.
try
{
return toURI(Paths.get(resource).toUri().toASCIIString());
}
catch (InvalidPathException x)
{
LOG.trace("ignored", x);
}
// If we reached this here, that means the input string cannot be used as
// a URI or a File Path. The cause is usually due to bad input (eg:
// characters that are not supported by file system)
if (LOG.isDebugEnabled())
LOG.debug("Input string cannot be converted to URI \"{}\"", resource);
throw new IllegalArgumentException("Cannot be converted to URI");
}
/**
*
* Unwrap a URI to expose its container path reference.
*
*
*
* Take out the container archive name URI from a {@code jar:file:${container-name}!/} URI.
*
*
* @param uri the input URI
* @return the container String if a {@code jar} scheme, or just the URI untouched.
*/
public static URI unwrapContainer(URI uri)
{
Objects.requireNonNull(uri);
String scheme = uri.getScheme();
if ((scheme == null) || !scheme.equalsIgnoreCase("jar"))
return uri;
String spec = uri.getRawSchemeSpecificPart();
int sep = spec.indexOf("!/");
if (sep != -1)
spec = spec.substring(0, sep);
return URI.create(spec);
}
/**
* Take a URI and add a deep reference {@code jar:file://foo.jar!/suffix}, replacing
* any existing deep reference on the input URI.
*
* @param uri the input URI (supporting {@code jar} or {@code file} based schemes
* @param encodedSuffix the suffix to set. Must start with {@code !/}. Must be properly URI encoded.
* @return the {@code jar:file:} based URI with a deep reference
*/
public static URI uriJarPrefix(URI uri, String encodedSuffix)
{
if (uri == null)
throw new IllegalArgumentException("URI must not be null");
if (encodedSuffix == null)
throw new IllegalArgumentException("Encoded Suffix must not be null");
if (!encodedSuffix.startsWith("!/"))
throw new IllegalArgumentException("Suffix must start with \"!/\"");
String uriString = uri.toASCIIString(); // ensure proper encoding
int bangSlash = uriString.indexOf("!/");
if (bangSlash >= 0)
uriString = uriString.substring(0, bangSlash);
if (uri.getScheme().equalsIgnoreCase("jar"))
{
return URI.create(uriString + encodedSuffix);
}
else if (uri.getScheme().equalsIgnoreCase("file"))
{
return URI.create("jar:" + uriString + encodedSuffix);
}
else
{
throw new IllegalArgumentException("Unsupported URI scheme: " + uri);
}
}
/**
* Stream the {@link URLClassLoader#getURLs()} as URIs
*
* @param urlClassLoader the classloader to load from
* @return the Stream of {@link URI}
*/
public static Stream streamOf(URLClassLoader urlClassLoader)
{
URL[] urls = urlClassLoader.getURLs();
return Stream.of(urls)
.filter(Objects::nonNull)
.map(URL::toString)
.map(URI::create)
.map(URIUtil::unwrapContainer)
.map(URIUtil::correctURI);
}
private static final Index DEFAULT_PORT_FOR_SCHEME = new Index.Builder()
.caseSensitive(false)
.with("ftp", 21)
.with("ssh", 22)
.with("telnet", 23)
.with("smtp", 25)
.with("http", 80)
.with("ws", 80)
.with("https", 443)
.with("wss", 443)
.build();
/**
* Get the default port for some well known schemes
* @param scheme The scheme
* @return The default port or -1 if not known
*/
public static int getDefaultPortForScheme(String scheme)
{
if (scheme == null)
return -1;
Integer port = DEFAULT_PORT_FOR_SCHEME.get(scheme);
return port == null ? -1 : port;
}
/**
* Normalize the scheme
* @param scheme The scheme to normalize
* @return The normalized version of the scheme
*/
public static String normalizeScheme(String scheme)
{
return scheme == null ? null : StringUtil.asciiToLowerCase(scheme);
}
/**
* Normalize a port for a given scheme
* @param scheme The scheme
* @param port The port to normalize
* @return The port number or 0 if provided port was less than 0 or was equal to the default port for the scheme
*/
public static int normalizePortForScheme(String scheme, int port)
{
if (port <= 0)
return 0;
return port == getDefaultPortForScheme(scheme) ? 0 : port;
}
}