All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aspectran.web.support.util.UriUtils Maven / Gradle / Ivy

There is a newer version: 8.1.5
Show newest version
/*
 * Copyright (c) 2008-2024 The Aspectran Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.aspectran.web.support.util;

import com.aspectran.utils.Assert;
import com.aspectran.utils.LinkedMultiValueMap;
import com.aspectran.utils.MultiValueMap;
import com.aspectran.utils.StringUtils;
import com.aspectran.utils.annotation.jsr305.NonNull;
import com.aspectran.utils.annotation.jsr305.Nullable;

import java.io.ByteArrayOutputStream;
import java.net.URI;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

/**
 * 

This class is a clone of org.springframework.web.util.UriUtils

* Utility methods for URI encoding and decoding based on RFC 3986. * *

There are two types of encode methods: *

    *
  • {@code "encodeXyz"} -- these encode a specific URI component (e.g. path, * query) by percent encoding illegal characters, which includes non-US-ASCII * characters, and also characters that are otherwise illegal within the given * URI component type, as defined in RFC 3986. The effect of this method, with * regards to encoding, is comparable to using the multi-argument constructor * of {@link URI}. *
  • {@code "encode"} and {@code "encodeUriVariables"} -- these can be used * to encode URI variable values by percent encoding all characters that are * either illegal, or have any reserved meaning, anywhere within a URI. *
* * @see RFC 3986 */ public abstract class UriUtils { /** * Encode the given URI scheme with the given encoding. * @param scheme the scheme to be encoded * @param encoding the character encoding to encode to * @return the encoded scheme */ public static String encodeScheme(String scheme, String encoding) { return encode(scheme, encoding, UriComponentsType.SCHEME); } /** * Encode the given URI scheme with the given encoding. * @param scheme the scheme to be encoded * @param charset the character encoding to encode to * @return the encoded scheme */ public static String encodeScheme(String scheme, Charset charset) { return encode(scheme, charset, UriComponentsType.SCHEME); } /** * Encode the given URI authority with the given encoding. * @param authority the authority to be encoded * @param encoding the character encoding to encode to * @return the encoded authority */ public static String encodeAuthority(String authority, String encoding) { return encode(authority, encoding, UriComponentsType.AUTHORITY); } /** * Encode the given URI authority with the given encoding. * @param authority the authority to be encoded * @param charset the character encoding to encode to * @return the encoded authority */ public static String encodeAuthority(String authority, Charset charset) { return encode(authority, charset, UriComponentsType.AUTHORITY); } /** * Encode the given URI user info with the given encoding. * @param userInfo the user info to be encoded * @param encoding the character encoding to encode to * @return the encoded user info */ public static String encodeUserInfo(String userInfo, String encoding) { return encode(userInfo, encoding, UriComponentsType.USER_INFO); } /** * Encode the given URI user info with the given encoding. * @param userInfo the user info to be encoded * @param charset the character encoding to encode to * @return the encoded user info */ public static String encodeUserInfo(String userInfo, Charset charset) { return encode(userInfo, charset, UriComponentsType.USER_INFO); } /** * Encode the given URI host with the given encoding. * @param host the host to be encoded * @param encoding the character encoding to encode to * @return the encoded host */ public static String encodeHost(String host, String encoding) { return encode(host, encoding, UriComponentsType.HOST_IPV4); } /** * Encode the given URI host with the given encoding. * @param host the host to be encoded * @param charset the character encoding to encode to * @return the encoded host */ public static String encodeHost(String host, Charset charset) { return encode(host, charset, UriComponentsType.HOST_IPV4); } /** * Encode the given URI port with the given encoding. * @param port the port to be encoded * @param encoding the character encoding to encode to * @return the encoded port */ public static String encodePort(String port, String encoding) { return encode(port, encoding, UriComponentsType.PORT); } /** * Encode the given URI port with the given encoding. * @param port the port to be encoded * @param charset the character encoding to encode to * @return the encoded port */ public static String encodePort(String port, Charset charset) { return encode(port, charset, UriComponentsType.PORT); } /** * Encode the given URI path with the given encoding. * @param path the path to be encoded * @param encoding the character encoding to encode to * @return the encoded path */ public static String encodePath(String path, String encoding) { return encode(path, encoding, UriComponentsType.PATH); } /** * Encode the given URI path with the given encoding. * @param path the path to be encoded * @param charset the character encoding to encode to * @return the encoded path */ public static String encodePath(String path, Charset charset) { return encode(path, charset, UriComponentsType.PATH); } /** * Encode the given URI path segment with the given encoding. * @param segment the segment to be encoded * @param encoding the character encoding to encode to * @return the encoded segment */ public static String encodePathSegment(String segment, String encoding) { return encode(segment, encoding, UriComponentsType.PATH_SEGMENT); } /** * Encode the given URI path segment with the given encoding. * @param segment the segment to be encoded * @param charset the character encoding to encode to * @return the encoded segment */ public static String encodePathSegment(String segment, Charset charset) { return encode(segment, charset, UriComponentsType.PATH_SEGMENT); } /** * Encode the given URI query with the given encoding. * @param query the query to be encoded * @param encoding the character encoding to encode to * @return the encoded query */ public static String encodeQuery(String query, String encoding) { return encode(query, encoding, UriComponentsType.QUERY); } /** * Encode the given URI query with the given encoding. * @param query the query to be encoded * @param charset the character encoding to encode to * @return the encoded query */ public static String encodeQuery(String query, Charset charset) { return encode(query, charset, UriComponentsType.QUERY); } /** * Encode the given URI query parameter with the given encoding. * @param queryParam the query parameter to be encoded * @param encoding the character encoding to encode to * @return the encoded query parameter */ public static String encodeQueryParam(String queryParam, String encoding) { return encode(queryParam, encoding, UriComponentsType.QUERY_PARAM); } /** * Encode the given URI query parameter with the given encoding. * @param queryParam the query parameter to be encoded * @param charset the character encoding to encode to * @return the encoded query parameter */ public static String encodeQueryParam(String queryParam, Charset charset) { return encode(queryParam, charset, UriComponentsType.QUERY_PARAM); } /** * Encode the query parameters from the given {@code MultiValueMap} with UTF-8. * @param params the parameters to encode * @return a new {@code MultiValueMap} with the encoded names and values */ @NonNull public static MultiValueMap encodeQueryParams(@NonNull MultiValueMap params) { Charset charset = StandardCharsets.UTF_8; MultiValueMap result = new LinkedMultiValueMap<>(params.size()); for (Map.Entry> entry : params.entrySet()) { for (String value : entry.getValue()) { result.add(encodeQueryParam(entry.getKey(), charset), encodeQueryParam(value, charset)); } } return result; } /** * Encode the given URI fragment with the given encoding. * @param fragment the fragment to be encoded * @param encoding the character encoding to encode to * @return the encoded fragment */ public static String encodeFragment(String fragment, String encoding) { return encode(fragment, encoding, UriComponentsType.FRAGMENT); } /** * Encode the given URI fragment with the given encoding. * @param fragment the fragment to be encoded * @param charset the character encoding to encode to * @return the encoded fragment */ public static String encodeFragment(String fragment, Charset charset) { return encode(fragment, charset, UriComponentsType.FRAGMENT); } /** * Variant of {@link #encode(String, Charset)} with a String charset. * @param source the String to be encoded * @param encoding the character encoding to encode to * @return the encoded String */ public static String encode(String source, String encoding) { return encode(source, encoding, UriComponentsType.URI); } /** * Encode all characters that are either illegal, or have any reserved * meaning, anywhere within a URI, as defined in * RFC 3986. * This is useful to ensure that the given String will be preserved as-is * and will not have any impact on the structure or meaning of the URI. * @param source the String to be encoded * @param charset the character encoding to encode to * @return the encoded String */ public static String encode(String source, Charset charset) { return encode(source, charset, UriComponentsType.URI); } /** * Convenience method to apply {@link #encode(String, Charset)} to all * given URI variable values. * @param uriVariables the URI variable values to be encoded * @return the encoded String */ @NonNull public static Map encodeUriVariables(@NonNull Map uriVariables) { Map result = new LinkedHashMap<>((int) Math.ceil(uriVariables.size() / (double)0.75f)); uriVariables.forEach((key, value) -> { String stringValue = (value != null ? value.toString() : ""); result.put(key, encode(stringValue, StandardCharsets.UTF_8)); }); return result; } /** * Convenience method to apply {@link #encode(String, Charset)} to all * given URI variable values. * @param uriVariables the URI variable values to be encoded * @return the encoded String */ @NonNull public static Object[] encodeUriVariables(Object... uriVariables) { return Arrays.stream(uriVariables) .map(value -> { String stringValue = (value != null ? value.toString() : ""); return encode(stringValue, StandardCharsets.UTF_8); }) .toArray(); } private static String encode(String scheme, String encoding, UriComponentsType type) { return encodeUriComponent(scheme, encoding, type); } private static String encode(String scheme, Charset charset, UriComponentsType type) { return encodeUriComponent(scheme, charset, type); } /** * Encode the given source into an encoded String using the rules specified * by the given component and with the given options. * @param source the source String * @param encoding the encoding of the source String * @param type the URI component for the source * @return the encoded URI * @throws IllegalArgumentException when the given value is not a valid URI component */ private static String encodeUriComponent(String source, String encoding, UriComponentsType type) { return encodeUriComponent(source, Charset.forName(encoding), type); } /** * Encode the given source into an encoded String using the rules specified * by the given component and with the given options. * @param source the source String * @param charset the encoding of the source String * @param type the URI component for the source * @return the encoded URI * @throws IllegalArgumentException when the given value is not a valid URI component */ private static String encodeUriComponent(String source, Charset charset, UriComponentsType type) { if (!StringUtils.hasLength(source)) { return source; } Assert.notNull(charset, "Charset must not be null"); Assert.notNull(type, "Type must not be null"); byte[] bytes = source.getBytes(charset); boolean original = true; for (byte b : bytes) { if (!type.isAllowed(b)) { original = false; break; } } if (original) { return source; } ByteArrayOutputStream baos = new ByteArrayOutputStream(bytes.length); for (byte b : bytes) { if (type.isAllowed(b)) { baos.write(b); } else { baos.write('%'); char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16)); baos.write(hex1); baos.write(hex2); } } return baos.toString(charset); } /** * Decode the given encoded URI component. * @param source the encoded String * @param encoding the character encoding to use * @return the decoded value * @throws IllegalArgumentException when the given source contains invalid encoded sequences * @see java.net.URLDecoder#decode(String, String) */ public static String decode(String source, String encoding) { return decode(source, Charset.forName(encoding)); } /** * Decode the given encoded URI component value. Based on the following rules: *
    *
  • Alphanumeric characters {@code "a"} through {@code "z"}, {@code "A"} through {@code "Z"}, * and {@code "0"} through {@code "9"} stay the same.
  • *
  • Special characters {@code "-"}, {@code "_"}, {@code "."}, and {@code "*"} stay the same.
  • *
  • A sequence "{@code %xy}" is interpreted as a hexadecimal representation of the character.
  • *
* @param source the encoded String * @param charset the character set * @return the decoded value * @throws IllegalArgumentException when the given source contains invalid encoded sequences * @see java.net.URLDecoder#decode(String, String) */ public static String decode(@NonNull String source, Charset charset) { int length = source.length(); if (length == 0) { return source; } Assert.notNull(charset, "Charset must not be null"); ByteArrayOutputStream baos = new ByteArrayOutputStream(length); boolean changed = false; for (int i = 0; i < length; i++) { int ch = source.charAt(i); if (ch == '%') { if (i + 2 < length) { char hex1 = source.charAt(i + 1); char hex2 = source.charAt(i + 2); int u = Character.digit(hex1, 16); int l = Character.digit(hex2, 16); if (u == -1 || l == -1) { throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\""); } baos.write((char) ((u << 4) + l)); i += 2; changed = true; } else { throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\""); } } else { baos.write(ch); } } return (changed ? baos.toString(charset) : source); } /** * Extract the file extension from the given URI path. * @param path the URI path (e.g. "/products/index.html") * @return the extracted file extension (e.g. "html") */ @Nullable public static String extractFileExtension(@NonNull String path) { int end = path.indexOf('?'); int fragmentIndex = path.indexOf('#'); if (fragmentIndex != -1 && (end == -1 || fragmentIndex < end)) { end = fragmentIndex; } if (end == -1) { end = path.length(); } int begin = path.lastIndexOf('/', end) + 1; int paramIndex = path.indexOf(';', begin); end = (paramIndex != -1 && paramIndex < end ? paramIndex : end); int extIndex = path.lastIndexOf('.', end); if (extIndex != -1 && extIndex >= begin) { return path.substring(extIndex + 1, end); } return null; } // Nested types /** * Enumeration used to identify the allowed characters per URI component. *

Contains methods to indicate whether a given character is valid in a specific URI component. * @see RFC 3986 */ enum UriComponentsType { SCHEME { @Override public boolean isAllowed(int c) { return isAlpha(c) || isDigit(c) || '+' == c || '-' == c || '.' == c; } }, AUTHORITY { @Override public boolean isAllowed(int c) { return isUnreserved(c) || isSubDelimiter(c) || ':' == c || '@' == c; } }, USER_INFO { @Override public boolean isAllowed(int c) { return isUnreserved(c) || isSubDelimiter(c) || ':' == c; } }, HOST_IPV4 { @Override public boolean isAllowed(int c) { return isUnreserved(c) || isSubDelimiter(c); } }, HOST_IPV6 { @Override public boolean isAllowed(int c) { return isUnreserved(c) || isSubDelimiter(c) || '[' == c || ']' == c || ':' == c; } }, PORT { @Override public boolean isAllowed(int c) { return isDigit(c); } }, PATH { @Override public boolean isAllowed(int c) { return isPchar(c) || '/' == c; } }, PATH_SEGMENT { @Override public boolean isAllowed(int c) { return isPchar(c); } }, QUERY { @Override public boolean isAllowed(int c) { return isPchar(c) || '/' == c || '?' == c; } }, QUERY_PARAM { @Override public boolean isAllowed(int c) { if ('=' == c || '&' == c) { return false; } else { return isPchar(c) || '/' == c || '?' == c; } } }, FRAGMENT { @Override public boolean isAllowed(int c) { return isPchar(c) || '/' == c || '?' == c; } }, URI { @Override public boolean isAllowed(int c) { return isUnreserved(c); } }; /** * Indicates whether the given character is allowed in this URI component. * @return {@code true} if the character is allowed; {@code false} otherwise */ public abstract boolean isAllowed(int c); /** * Indicates whether the given character is in the {@code ALPHA} set. * @see RFC 3986, appendix A */ protected boolean isAlpha(int c) { return (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'); } /** * Indicates whether the given character is in the {@code DIGIT} set. * @see RFC 3986, appendix A */ protected boolean isDigit(int c) { return (c >= '0' && c <= '9'); } /** * Indicates whether the given character is in the {@code gen-delims} set. * @see RFC 3986, appendix A */ protected boolean isGenericDelimiter(int c) { return (':' == c || '/' == c || '?' == c || '#' == c || '[' == c || ']' == c || '@' == c); } /** * Indicates whether the given character is in the {@code sub-delims} set. * @see RFC 3986, appendix A */ protected boolean isSubDelimiter(int c) { return ('!' == c || '$' == c || '&' == c || '\'' == c || '(' == c || ')' == c || '*' == c || '+' == c || ',' == c || ';' == c || '=' == c); } /** * Indicates whether the given character is in the {@code reserved} set. * @see RFC 3986, appendix A */ protected boolean isReserved(int c) { return (isGenericDelimiter(c) || isSubDelimiter(c)); } /** * Indicates whether the given character is in the {@code unreserved} set. * @see RFC 3986, appendix A */ protected boolean isUnreserved(int c) { return (isAlpha(c) || isDigit(c) || '-' == c || '.' == c || '_' == c || '~' == c); } /** * Indicates whether the given character is in the {@code pchar} set. * @see RFC 3986, appendix A */ protected boolean isPchar(int c) { return (isUnreserved(c) || isSubDelimiter(c) || ':' == c || '@' == c); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy