All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.lockss.util.SpringWeb307UriUtils Maven / Gradle / Ivy

The newest version!
/*
Copyright (c) 2000-2017 Board of Trustees of Leland Stanford Jr. University,
all rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
STANFORD UNIVERSITY BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

Except as contained in this notice, the name of Stanford University shall not
be used in advertising or otherwise to promote the sale, use or other dealings
in this Software without prior written authorization from Stanford University.

*/

/*
 * Copyright 2002-2010 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.lockss.util;

import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.util.BitSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.springframework.util.Assert;

/**
 * Utility class for URI encoding and decoding based on RFC 3986. Offers encoding methods for
 * the various URI components.
 *
 * 

All {@code encode*(String, String} methods in this class operate in a similar way: *

    *
  • Valid characters for the specific URI component as defined in RFC 3986 stay the same. *
  • All other characters are converted into one or more bytes in the given encoding scheme. * Each of the resulting bytes is written as a hexadecimal string in the "%xy" format. *
* * @author Arjen Poutsma * @since 3.0 * @see RFC 3986 * * Copied from Spring Web 3.0.7, as this functionality appears to have * been removed from Spring Web 4. */ public abstract class SpringWeb307UriUtils { private static final BitSet SCHEME; private static final BitSet USER_INFO; private static final BitSet HOST; private static final BitSet PORT; private static final BitSet PATH; private static final BitSet SEGMENT; private static final BitSet QUERY; private static final BitSet QUERY_PARAM; private static final BitSet FRAGMENT; private static final String SCHEME_PATTERN = "([^:/?#]+):"; private static final String HTTP_PATTERN = "(http|https):"; private static final String USERINFO_PATTERN = "([^@/]*)"; private static final String HOST_PATTERN = "([^/?#:]*)"; private static final String PORT_PATTERN = "(\\d*)"; private static final String PATH_PATTERN = "([^?#]*)"; private static final String QUERY_PATTERN = "([^#]*)"; private static final String LAST_PATTERN = "(.*)"; // Regex patterns that matches URIs. See RFC 3986, appendix B private static final Pattern URI_PATTERN = Pattern.compile( "^(" + SCHEME_PATTERN + ")?" + "(//(" + USERINFO_PATTERN + "@)?" + HOST_PATTERN + "(:" + PORT_PATTERN + ")?" + ")?" + PATH_PATTERN + "(\\?" + QUERY_PATTERN + ")?" + "(#" + LAST_PATTERN + ")?"); private static final Pattern HTTP_URL_PATTERN = Pattern.compile( "^" + HTTP_PATTERN + "(//(" + USERINFO_PATTERN + "@)?" + HOST_PATTERN + "(:" + PORT_PATTERN + ")?" + ")?" + PATH_PATTERN + "(\\?" + LAST_PATTERN + ")?"); static { // variable names refer to RFC 3986, appendix A BitSet alpha = new BitSet(256); for (int i = 'a'; i <= 'z'; i++) { alpha.set(i); } for (int i = 'A'; i <= 'Z'; i++) { alpha.set(i); } BitSet digit = new BitSet(256); for (int i = '0'; i <= '9'; i++) { digit.set(i); } BitSet gendelims = new BitSet(256); gendelims.set(':'); gendelims.set('/'); gendelims.set('?'); gendelims.set('#'); gendelims.set('['); gendelims.set(']'); gendelims.set('@'); BitSet subdelims = new BitSet(256); subdelims.set('!'); subdelims.set('$'); subdelims.set('&'); subdelims.set('\''); subdelims.set('('); subdelims.set(')'); subdelims.set('*'); subdelims.set('+'); subdelims.set(','); subdelims.set(';'); subdelims.set('='); BitSet reserved = new BitSet(256); reserved.or(gendelims); reserved.or(subdelims); BitSet unreserved = new BitSet(256); unreserved.or(alpha); unreserved.or(digit); unreserved.set('-'); unreserved.set('.'); unreserved.set('_'); unreserved.set('~'); SCHEME = new BitSet(256); SCHEME.or(alpha); SCHEME.or(digit); SCHEME.set('+'); SCHEME.set('-'); SCHEME.set('.'); USER_INFO = new BitSet(256); USER_INFO.or(unreserved); USER_INFO.or(subdelims); USER_INFO.set(':'); HOST = new BitSet(256); HOST.or(unreserved); HOST.or(subdelims); PORT = new BitSet(256); PORT.or(digit); BitSet pchar = new BitSet(256); pchar.or(unreserved); pchar.or(subdelims); pchar.set(':'); pchar.set('@'); SEGMENT = new BitSet(256); SEGMENT.or(pchar); PATH = new BitSet(256); PATH.or(SEGMENT); PATH.set('/'); QUERY = new BitSet(256); QUERY.or(pchar); QUERY.set('/'); QUERY.set('?'); QUERY_PARAM = new BitSet(256); QUERY_PARAM.or(pchar); QUERY_PARAM.set('/'); QUERY_PARAM.set('?'); QUERY_PARAM.clear('='); QUERY_PARAM.clear('+'); QUERY_PARAM.clear('&'); FRAGMENT = new BitSet(256); FRAGMENT.or(pchar); FRAGMENT.set('/'); FRAGMENT.set('?'); } /** * Encodes the given source URI into an encoded String. All various URI components * are encoded according to their respective valid character sets. * @param uri the URI to be encoded * @param encoding the character encoding to encode to * @return the encoded URI * @throws IllegalArgumentException when the given uri parameter is not a valid URI * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeUri(String uri, String encoding) throws UnsupportedEncodingException { Assert.notNull(uri, "'uri' must not be null"); Assert.hasLength(encoding, "'encoding' must not be empty"); Matcher m = URI_PATTERN.matcher(uri); if (m.matches()) { String scheme = m.group(2); String authority = m.group(3); String userinfo = m.group(5); String host = m.group(6); String port = m.group(8); String path = m.group(9); String query = m.group(11); String fragment = m.group(13); return encodeUriComponents(scheme, authority, userinfo, host, port, path, query, fragment, encoding); } else { throw new IllegalArgumentException("[" + uri + "] is not a valid URI"); } } /** * Encodes the given HTTP URI into an encoded String. All various URI components * are encoded according to their respective valid character sets. *

Note that this method does not support fragments ({@code #}), * as these are not supposed to be sent to the server, but retained by the client. * @param httpUrl the HTTP URL to be encoded * @param encoding the character encoding to encode to * @return the encoded URL * @throws IllegalArgumentException when the given uri parameter is not a valid URI * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeHttpUrl(String httpUrl, String encoding) throws UnsupportedEncodingException { Assert.notNull(httpUrl, "'httpUrl' must not be null"); Assert.hasLength(encoding, "'encoding' must not be empty"); Matcher m = HTTP_URL_PATTERN.matcher(httpUrl); if (m.matches()) { String scheme = m.group(1); String authority = m.group(2); String userinfo = m.group(4); String host = m.group(5); String portString = m.group(7); String path = m.group(8); String query = m.group(10); return encodeUriComponents(scheme, authority, userinfo, host, portString, path, query, null, encoding); } else { throw new IllegalArgumentException("[" + httpUrl + "] is not a valid HTTP URL"); } } /** * Encodes the given source URI components into an encoded String. * All various URI components are optional, but encoded according * to their respective valid character sets. * @param scheme the scheme * @param authority the authority * @param userinfo the user info * @param host the host * @param port the port * @param path the path * @param query the query * @param fragment the fragment * @param encoding the character encoding to encode to * @return the encoded URI * @throws IllegalArgumentException when the given uri parameter is not a valid URI * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeUriComponents(String scheme, String authority, String userinfo, String host, String port, String path, String query, String fragment, String encoding) throws UnsupportedEncodingException { Assert.hasLength(encoding, "'encoding' must not be empty"); StringBuilder sb = new StringBuilder(); if (scheme != null) { sb.append(encodeScheme(scheme, encoding)); sb.append(':'); } if (authority != null) { sb.append("//"); if (userinfo != null) { sb.append(encodeUserInfo(userinfo, encoding)); sb.append('@'); } if (host != null) { sb.append(encodeHost(host, encoding)); } if (port != null) { sb.append(':'); sb.append(encodePort(port, encoding)); } } sb.append(encodePath(path, encoding)); if (query != null) { sb.append('?'); sb.append(encodeQuery(query, encoding)); } if (fragment != null) { sb.append('#'); sb.append(encodeFragment(fragment, encoding)); } return sb.toString(); } /** * Encodes the given URI scheme. * @param scheme the scheme to be encoded * @param encoding the character encoding to encode to * @return the encoded scheme * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeScheme(String scheme, String encoding) throws UnsupportedEncodingException { return encode(scheme, encoding, SCHEME); } /** * Encodes the given URI user info. * @param userInfo the user info to be encoded * @param encoding the character encoding to encode to * @return the encoded user info * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeUserInfo(String userInfo, String encoding) throws UnsupportedEncodingException { return encode(userInfo, encoding, USER_INFO); } /** * Encodes the given URI host. * @param host the host to be encoded * @param encoding the character encoding to encode to * @return the encoded host * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeHost(String host, String encoding) throws UnsupportedEncodingException { return encode(host, encoding, HOST); } /** * Encodes the given URI port. * @param port the port to be encoded * @param encoding the character encoding to encode to * @return the encoded port * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodePort(String port, String encoding) throws UnsupportedEncodingException { return encode(port, encoding, PORT); } /** * Encodes the given URI path. * @param path the path to be encoded * @param encoding the character encoding to encode to * @return the encoded path * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodePath(String path, String encoding) throws UnsupportedEncodingException { return encode(path, encoding, PATH); } /** * Encodes the given URI path segment. * @param segment the segment to be encoded * @param encoding the character encoding to encode to * @return the encoded segment * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodePathSegment(String segment, String encoding) throws UnsupportedEncodingException { return encode(segment, encoding, SEGMENT); } /** * Encodes the given URI query. * @param query the query to be encoded * @param encoding the character encoding to encode to * @return the encoded query * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeQuery(String query, String encoding) throws UnsupportedEncodingException { return encode(query, encoding, QUERY); } /** * Encodes the given URI query parameter. * @param queryParam the query parameter to be encoded * @param encoding the character encoding to encode to * @return the encoded query parameter * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeQueryParam(String queryParam, String encoding) throws UnsupportedEncodingException { return encode(queryParam, encoding, QUERY_PARAM); } /** * Encodes the given URI fragment. * @param fragment the fragment to be encoded * @param encoding the character encoding to encode to * @return the encoded fragment * @throws UnsupportedEncodingException when the given encoding parameter is not supported */ public static String encodeFragment(String fragment, String encoding) throws UnsupportedEncodingException { return encode(fragment, encoding, FRAGMENT); } private static String encode(String source, String encoding, BitSet notEncoded) throws UnsupportedEncodingException { Assert.notNull(source, "'source' must not be null"); Assert.hasLength(encoding, "'encoding' must not be empty"); byte[] bytes = encode(source.getBytes(encoding), notEncoded); return new String(bytes, "US-ASCII"); } private static byte[] encode(byte[] source, BitSet notEncoded) { Assert.notNull(source, "'source' must not be null"); ByteArrayOutputStream bos = new ByteArrayOutputStream(source.length * 2); for (int i = 0; i < source.length; i++) { int b = source[i]; if (b < 0) { b += 256; } if (notEncoded.get(b)) { bos.write(b); } else { bos.write('%'); char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16)); bos.write(hex1); bos.write(hex2); } } return bos.toByteArray(); } /** * Decodes the given encoded source String into an URI. Based on the following * rules: *

    *
  • Alphanumeric characters {@code "a"} through {@code "z"}, * {@code "A"} through {@code "Z"}, and {@code "0"} through {@code "9"} * stay the same. *
  • Special characters {@code "-"}, {@code "_"}, {@code "."}, and * {@code "*"} stay the same. *
  • All other characters are converted into one or more bytes using the * given encoding scheme. Each of the resulting bytes is written as a * hexadecimal string in the {@code %xy} format. *
  • A sequence "%xy" is interpreted as a hexadecimal * representation of the character. *
* @param source the source string * @param encoding the encoding * @return the decoded URI * @throws UnsupportedEncodingException when the given encoding parameter is not supported * @see java.net.URLDecoder#decode(String, String) */ public static String decode(String source, String encoding) throws UnsupportedEncodingException { Assert.notNull(source, "'source' must not be null"); Assert.hasLength(encoding, "'encoding' must not be empty"); int length = source.length(); ByteArrayOutputStream bos = new ByteArrayOutputStream(length); boolean changed = false; for (int i = 0; i < length; i++) { int ch = source.charAt(i); if (ch == '%') { if ((i + 2) < length) { char hex1 = source.charAt(i + 1); char hex2 = source.charAt(i + 2); int u = Character.digit(hex1, 16); int l = Character.digit(hex2, 16); bos.write((char) ((u << 4) + l)); i += 2; changed = true; } else { throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\""); } } else { bos.write(ch); } } return changed ? new String(bos.toByteArray(), encoding) : source; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy