io.micronaut.web.router.uri.UriUtil Maven / Gradle / Ivy
/*
* Copyright 2017-2025 original authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.micronaut.web.router.uri;
import io.micronaut.core.annotation.NonNull;
import java.net.URI;
/**
* Utilities for converting URI formats.
*
* @author Jonas Konrad
* @since 4.9.0
*/
public final class UriUtil {
private UriUtil() {
}
/**
* Transform a path+query as specified by the whatwg url spec into a path+query that is allowed
* by RFC 3986. Whatwg permits certain characters (e.g. '|') and invalid percent escape
* sequences that RFC 3986 (or {@link URI}) does not allow. This method will percent-encode
* those cases, so that any URI sent by a browser can be transformed to {@link URI}.
*
* @param path The whatwg path+query
* @return A valid RFC 3986 {@code relative-ref}
*/
public static String toValidPath(String path) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < path.length();) {
int cp = path.codePointAt(i);
if (cp == '%') {
boolean validEscape;
if (i + 2 >= path.length()) {
validEscape = false;
} else {
char c1 = path.charAt(i + 1);
char c2 = path.charAt(i + 2);
validEscape = isAsciiHexDigit(c1) && isAsciiHexDigit(c2);
}
if (validEscape) {
sb.appendCodePoint(cp);
} else {
PercentEncoder.appendEncodedByte(sb, (byte) '%');
}
} else {
if (cp == '/' && sb.length() == 1 && sb.charAt(0) == '/') {
// prevent '//' at start of url
} else {
PercentEncoder.RFC3986_QUERY_CHAR.encodeUtf8(sb, cp);
}
}
i += Character.charCount(cp);
}
return sb.toString();
}
/**
* Check whether the given HTTP request target is a valid RFC 3986 relative URI (path + query)
* that will be parsed without complaint by {@link URI}. If this is true, we can skip the
* expensive parsing until necessary.
*
* @param requestTarget The HTTP request line
* @return {@code true} iff this is a valid relative URI
*/
public static boolean isValidPath(@NonNull String requestTarget) {
if (requestTarget.isEmpty() || requestTarget.charAt(0) != '/') {
return false;
}
for (int i = 0; i < requestTarget.length(); i++) {
char c = requestTarget.charAt(i);
if (c == '%' || c > 0x7f || !PercentEncoder.RFC3986_QUERY_CHAR.keep((byte) c)) {
return false;
}
if (c == '/' && i < requestTarget.length() - 1) {
char next = requestTarget.charAt(i + 1);
if (next == '/') {
return false;
}
if (next == '.') {
if (i >= requestTarget.length() - 2) {
return false;
}
char nextNext = requestTarget.charAt(i + 2);
if (nextNext == '.' || nextNext == '/' || nextNext == '?' || nextNext == '#') {
return false;
}
}
}
}
return true;
}
/**
* Determine whether the given HTTP request target is a relative URI (path+query) appropriate
* for {@link #toValidPath(String)}. The invariants are:
*
*
* - This method returns {@code true} exactly when, according to the whatwg URL spec, this
* URL has no scheme
* - If the input is a valid URI, this method is equal to the inverse of
* {@link URI#isAbsolute()}
* - If this method returns {@code true}, and the input is a valid URI after going
* through {@link #toValidPath(String)}, {@link URI#isAbsolute()} is {@code false}
*
*
* @param requestTarget The HTTP request target
* @return {@code true} if this URL is relative
*/
public static boolean isRelative(@NonNull String requestTarget) {
// yes this code is weird. There's a fuzz test that checks it against the whatwg spec
boolean start = true;
for (int i = 0; i < requestTarget.length(); i++) {
char c = requestTarget.charAt(i);
if (c == '\t' || c == '\n' || c == '\r') {
// newline and tab is ignored anywhere.
continue;
}
if (isAsciiLowerAlpha(c) || isAsciiUpperAlpha(c)) {
start = false;
continue;
}
if (!start) {
if (c == ':') {
return false;
}
if (isAsciiDigit(c) || c == '+' || c == '-' || c == '.') {
continue;
}
if (isC0OrSpace(c)) {
// c0 and space are trimmed at start and end, so we are either invalid or at
// the end
break;
}
} else {
if (isC0OrSpace(c)) {
// c0 and space are trimmed at start and end.
continue;
}
}
break;
}
return true;
}
private static boolean isC0(int c) {
return c <= 0x1f;
}
private static boolean isC0OrSpace(char c) {
return isC0(c) || c == ' ';
}
private static boolean isAsciiDigit(int c) {
return c >= '0' && c <= '9';
}
private static boolean isAsciiUpperHexDigit(int c) {
return isAsciiDigit(c) || (c >= 'A' && c <= 'F');
}
private static boolean isAsciiLowerHexDigit(int c) {
return isAsciiDigit(c) || (c >= 'a' && c <= 'f');
}
private static boolean isAsciiHexDigit(int c) {
return isAsciiLowerHexDigit(c) || isAsciiUpperHexDigit(c);
}
private static boolean isAsciiUpperAlpha(int c) {
return c >= 'A' && c <= 'Z';
}
private static boolean isAsciiLowerAlpha(int c) {
return c >= 'a' && c <= 'z';
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy