All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.springframework.web.util.UriUtils Maven / Gradle / Ivy

There is a newer version: 6.1.13
Show newest version
/*
 * Copyright 2002-2010 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.springframework.web.util;

import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.util.BitSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.springframework.util.Assert;

/**
 * Utility class for URI encoding and decoding based on RFC 3986. Offers encoding methods for
 * the various URI components.
 *
 * 

All {@code encode*(String, String} methods in this class operate in a similar way: *

    *
  • Valid characters for the specific URI component as defined in RFC 3986 stay the same. *
  • All other characters are converted into one or more bytes in the given encoding scheme. * Each of the resulting bytes is written as a hexadecimal string in the "%xy" format. *
* * @author Arjen Poutsma * @since 3.0 * @see RFC 3986 */ public abstract class UriUtils { private static final BitSet SCHEME; private static final BitSet USER_INFO; private static final BitSet HOST; private static final BitSet PORT; private static final BitSet PATH; private static final BitSet SEGMENT; private static final BitSet QUERY; private static final BitSet QUERY_PARAM; private static final BitSet FRAGMENT; private static final String SCHEME_PATTERN = "([^:/?#]+):"; private static final String HTTP_PATTERN = "(http|https):"; private static final String USERINFO_PATTERN = "([^@/]*)"; private static final String HOST_PATTERN = "([^/?#:]*)"; private static final String PORT_PATTERN = "(\\d*)"; private static final String PATH_PATTERN = "([^?#]*)"; private static final String QUERY_PATTERN = "([^#]*)"; private static final String LAST_PATTERN = "(.*)"; // Regex patterns that matches URIs. See RFC 3986, appendix B private static final Pattern URI_PATTERN = Pattern.compile( "^(" + SCHEME_PATTERN + ")?" + "(//(" + USERINFO_PATTERN + "@)?" + HOST_PATTERN + "(:" + PORT_PATTERN + ")?" + ")?" + PATH_PATTERN + "(\\?" + QUERY_PATTERN + ")?" + "(#" + LAST_PATTERN + ")?"); private static final Pattern HTTP_URL_PATTERN = Pattern.compile( "^" + HTTP_PATTERN + "(//(" + USERINFO_PATTERN + "@)?" + HOST_PATTERN + "(:" + PORT_PATTERN + ")?" + ")?" + PATH_PATTERN + "(\\?" + LAST_PATTERN + ")?"); static { // variable names refer to RFC 3986, appendix A BitSet alpha = new BitSet(256); for (int i = 'a'; i <= 'z'; i++) { alpha.set(i); } for (int i = 'A'; i <= 'Z'; i++) { alpha.set(i); } BitSet digit = new BitSet(256); for (int i = '0'; i <= '9'; i++) { digit.set(i); } BitSet gendelims = new BitSet(256); gendelims.set(':'); gendelims.set('/'); gendelims.set('?'); gendelims.set('#'); gendelims.set('['); gendelims.set(']'); gendelims.set('@'); BitSet subdelims = new BitSet(256); subdelims.set('!'); subdelims.set('$'); subdelims.set('&'); subdelims.set('\''); subdelims.set('('); subdelims.set(')'); subdelims.set('*'); subdelims.set('+'); subdelims.set(','); subdelims.set(';'); subdelims.set('='); BitSet reserved = new BitSet(256); reserved.or(gendelims); reserved.or(subdelims); BitSet unreserved = new BitSet(256); unreserved.or(alpha); unreserved.or(digit); unreserved.set('-'); unreserved.set('.'); unreserved.set('_'); unreserved.set('~'); SCHEME = new BitSet(256); SCHEME.or(alpha); SCHEME.or(digit); SCHEME.set('+'); SCHEME.set('-'); SCHEME.set('.'); USER_INFO = new BitSet(256); USER_INFO.or(unreserved); USER_INFO.or(subdelims); USER_INFO.set(':'); HOST = new BitSet(256); HOST.or(unreserved); HOST.or(subdelims); PORT = new BitSet(256); PORT.or(digit); BitSet pchar = new BitSet(256); pchar.or(unreserved); pchar.or(subdelims); pchar.set(':'); pchar.set('@'); SEGMENT = new BitSet(256); SEGMENT.or(pchar); PATH = new BitSet(256); PATH.or(SEGMENT); PATH.set('/'); QUERY = new BitSet(256); QUERY.or(pchar); QUERY.set('/'); QUERY.set('?'); QUERY_PARAM = new BitSet(256); QUERY_PARAM.or(pchar); QUERY_PARAM.set('/'); QUERY_PARAM.set('?'); QUERY_PARAM.clear('='); QUERY_PARAM.clear('+'); QUERY_PARAM.clear('&'); FRAGMENT = new BitSet(256); FRAGMENT.or(pchar); FRAGMENT.set('/'); FRAGMENT.set('?'); } /** * Encodes the given source URI into an encoded String. All various URI components * are encoded according to their respective valid character sets. * @param uri the URI to be encoded * @param encoding the character encoding to encode to * @return the encoded URI * @throws IllegalArgumentException when the given uri parameter is not a valid URI * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeUri(String uri, String encoding) throws UnsupportedEncodingException { Assert.notNull(uri, "'uri' must not be null"); Assert.hasLength(encoding, "'encoding' must not be empty"); Matcher m = URI_PATTERN.matcher(uri); if (m.matches()) { String scheme = m.group(2); String authority = m.group(3); String userinfo = m.group(5); String host = m.group(6); String port = m.group(8); String path = m.group(9); String query = m.group(11); String fragment = m.group(13); return encodeUriComponents(scheme, authority, userinfo, host, port, path, query, fragment, encoding); } else { throw new IllegalArgumentException("[" + uri + "] is not a valid URI"); } } /** * Encodes the given HTTP URI into an encoded String. All various URI components * are encoded according to their respective valid character sets. *

Note that this method does not support fragments ({@code #}), * as these are not supposed to be sent to the server, but retained by the client. * @param httpUrl the HTTP URL to be encoded * @param encoding the character encoding to encode to * @return the encoded URL * @throws IllegalArgumentException when the given uri parameter is not a valid URI * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeHttpUrl(String httpUrl, String encoding) throws UnsupportedEncodingException { Assert.notNull(httpUrl, "'httpUrl' must not be null"); Assert.hasLength(encoding, "'encoding' must not be empty"); Matcher m = HTTP_URL_PATTERN.matcher(httpUrl); if (m.matches()) { String scheme = m.group(1); String authority = m.group(2); String userinfo = m.group(4); String host = m.group(5); String portString = m.group(7); String path = m.group(8); String query = m.group(10); return encodeUriComponents(scheme, authority, userinfo, host, portString, path, query, null, encoding); } else { throw new IllegalArgumentException("[" + httpUrl + "] is not a valid HTTP URL"); } } /** * Encodes the given source URI components into an encoded String. * All various URI components are optional, but encoded according * to their respective valid character sets. * @param scheme the scheme * @param authority the authority * @param userinfo the user info * @param host the host * @param port the port * @param path the path * @param query the query * @param fragment the fragment * @param encoding the character encoding to encode to * @return the encoded URI * @throws IllegalArgumentException when the given uri parameter is not a valid URI * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeUriComponents(String scheme, String authority, String userinfo, String host, String port, String path, String query, String fragment, String encoding) throws UnsupportedEncodingException { Assert.hasLength(encoding, "'encoding' must not be empty"); StringBuilder sb = new StringBuilder(); if (scheme != null) { sb.append(encodeScheme(scheme, encoding)); sb.append(':'); } if (authority != null) { sb.append("//"); if (userinfo != null) { sb.append(encodeUserInfo(userinfo, encoding)); sb.append('@'); } if (host != null) { sb.append(encodeHost(host, encoding)); } if (port != null) { sb.append(':'); sb.append(encodePort(port, encoding)); } } sb.append(encodePath(path, encoding)); if (query != null) { sb.append('?'); sb.append(encodeQuery(query, encoding)); } if (fragment != null) { sb.append('#'); sb.append(encodeFragment(fragment, encoding)); } return sb.toString(); } /** * Encodes the given URI scheme. * @param scheme the scheme to be encoded * @param encoding the character encoding to encode to * @return the encoded scheme * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeScheme(String scheme, String encoding) throws UnsupportedEncodingException { return encode(scheme, encoding, SCHEME); } /** * Encodes the given URI user info. * @param userInfo the user info to be encoded * @param encoding the character encoding to encode to * @return the encoded user info * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeUserInfo(String userInfo, String encoding) throws UnsupportedEncodingException { return encode(userInfo, encoding, USER_INFO); } /** * Encodes the given URI host. * @param host the host to be encoded * @param encoding the character encoding to encode to * @return the encoded host * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeHost(String host, String encoding) throws UnsupportedEncodingException { return encode(host, encoding, HOST); } /** * Encodes the given URI port. * @param port the port to be encoded * @param encoding the character encoding to encode to * @return the encoded port * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodePort(String port, String encoding) throws UnsupportedEncodingException { return encode(port, encoding, PORT); } /** * Encodes the given URI path. * @param path the path to be encoded * @param encoding the character encoding to encode to * @return the encoded path * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodePath(String path, String encoding) throws UnsupportedEncodingException { return encode(path, encoding, PATH); } /** * Encodes the given URI path segment. * @param segment the segment to be encoded * @param encoding the character encoding to encode to * @return the encoded segment * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodePathSegment(String segment, String encoding) throws UnsupportedEncodingException { return encode(segment, encoding, SEGMENT); } /** * Encodes the given URI query. * @param query the query to be encoded * @param encoding the character encoding to encode to * @return the encoded query * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeQuery(String query, String encoding) throws UnsupportedEncodingException { return encode(query, encoding, QUERY); } /** * Encodes the given URI query parameter. * @param queryParam the query parameter to be encoded * @param encoding the character encoding to encode to * @return the encoded query parameter * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeQueryParam(String queryParam, String encoding) throws UnsupportedEncodingException { return encode(queryParam, encoding, QUERY_PARAM); } /** * Encodes the given URI fragment. * @param fragment the fragment to be encoded * @param encoding the character encoding to encode to * @return the encoded fragment * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeFragment(String fragment, String encoding) throws UnsupportedEncodingException { return encode(fragment, encoding, FRAGMENT); } private static String encode(String source, String encoding, BitSet notEncoded) throws UnsupportedEncodingException { Assert.notNull(source, "'source' must not be null"); Assert.hasLength(encoding, "'encoding' must not be empty"); byte[] bytes = encode(source.getBytes(encoding), notEncoded); return new String(bytes, "US-ASCII"); } private static byte[] encode(byte[] source, BitSet notEncoded) { Assert.notNull(source, "'source' must not be null"); ByteArrayOutputStream bos = new ByteArrayOutputStream(source.length * 2); for (int i = 0; i < source.length; i++) { int b = source[i]; if (b < 0) { b += 256; } if (notEncoded.get(b)) { bos.write(b); } else { bos.write('%'); char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16)); bos.write(hex1); bos.write(hex2); } } return bos.toByteArray(); } /** * Decodes the given encoded source String into an URI. Based on the following * rules: *

    *
  • Alphanumeric characters {@code "a"} through {@code "z"}, * {@code "A"} through {@code "Z"}, and {@code "0"} through {@code "9"} * stay the same. *
  • Special characters {@code "-"}, {@code "_"}, {@code "."}, and * {@code "*"} stay the same. *
  • All other characters are converted into one or more bytes using the * given encoding scheme. Each of the resulting bytes is written as a * hexadecimal string in the {@code %xy} format. *
  • A sequence "%xy" is interpreted as a hexadecimal * representation of the character. *
* @param source the source string * @param encoding the encoding * @return the decoded URI * @throws UnsupportedEncodingException when the given encoding parameter is not supported * @see java.net.URLDecoder#decode(String, String) */ public static String decode(String source, String encoding) throws UnsupportedEncodingException { Assert.notNull(source, "'source' must not be null"); Assert.hasLength(encoding, "'encoding' must not be empty"); int length = source.length(); ByteArrayOutputStream bos = new ByteArrayOutputStream(length); boolean changed = false; for (int i = 0; i < length; i++) { int ch = source.charAt(i); if (ch == '%') { if ((i + 2) < length) { char hex1 = source.charAt(i + 1); char hex2 = source.charAt(i + 2); int u = Character.digit(hex1, 16); int l = Character.digit(hex2, 16); bos.write((char) ((u << 4) + l)); i += 2; changed = true; } else { throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\""); } } else { bos.write(ch); } } return changed ? new String(bos.toByteArray(), encoding) : source; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy