All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.http.client.utils.URLEncodedUtils Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*
 * ====================================================================
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * .
 *
 */

package org.apache.http.client.utils;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.List;
import java.util.Scanner;

import org.apache.http.Consts;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.entity.ContentType;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.message.ParserCursor;
import org.apache.http.message.TokenParser;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.Args;
import org.apache.http.util.CharArrayBuffer;

/**
 * A collection of utilities for encoding URLs.
 *
 * @since 4.0
 */
public class URLEncodedUtils {

    /**
     * The default HTML form content type.
     */
    public static final String CONTENT_TYPE = "application/x-www-form-urlencoded";

    private static final char QP_SEP_A = '&';
    private static final char QP_SEP_S = ';';
    private static final String NAME_VALUE_SEPARATOR = "=";
    private static final char PATH_SEPARATOR = '/';

    private static final BitSet PATH_SEPARATORS     = new BitSet(256);
    static {
        PATH_SEPARATORS.set(PATH_SEPARATOR);
    }

    /**
     * @deprecated 4.5 Use {@link #parse(URI, Charset)}
     */
    @Deprecated
    public static List  parse(final URI uri, final String charsetName) {
        return parse(uri, charsetName != null ? Charset.forName(charsetName) : null);
    }

    /**
     * Returns a list of {@link NameValuePair}s URI query parameters.
     * By convention, {@code '&'} and {@code ';'} are accepted as parameter separators.
     *
     * @param uri input URI.
     * @param charset parameter charset.
     * @return list of query parameters.
     *
     * @since 4.5
     */
    public static List  parse(final URI uri, final Charset charset) {
        Args.notNull(uri, "URI");
        final String query = uri.getRawQuery();
        if (query != null && !query.isEmpty()) {
            return parse(query, charset);
        }
        return createEmptyList();
    }

    /**
     * Returns a list of {@link NameValuePair NameValuePairs} as parsed from an {@link HttpEntity}.
     * The encoding is taken from the entity's Content-Encoding header.
     * 

* This is typically used while parsing an HTTP POST. * * @param entity * The entity to parse * @return a list of {@link NameValuePair} as built from the URI's query portion. * @throws IOException * If there was an exception getting the entity's data. */ public static List parse( final HttpEntity entity) throws IOException { Args.notNull(entity, "HTTP entity"); final ContentType contentType = ContentType.get(entity); if (contentType == null || !contentType.getMimeType().equalsIgnoreCase(CONTENT_TYPE)) { return createEmptyList(); } final long len = entity.getContentLength(); Args.check(len <= Integer.MAX_VALUE, "HTTP entity is too large"); final Charset charset = contentType.getCharset() != null ? contentType.getCharset() : HTTP.DEF_CONTENT_CHARSET; final InputStream inStream = entity.getContent(); if (inStream == null) { return createEmptyList(); } final CharArrayBuffer buf; try { buf = new CharArrayBuffer(len > 0 ? (int) len : 1024); final Reader reader = new InputStreamReader(inStream, charset); final char[] tmp = new char[1024]; int l; while((l = reader.read(tmp)) != -1) { buf.append(tmp, 0, l); } } finally { inStream.close(); } if (buf.isEmpty()) { return createEmptyList(); } return parse(buf, charset, QP_SEP_A); } /** * Returns true if the entity's Content-Type header is * {@code application/x-www-form-urlencoded}. */ public static boolean isEncoded(final HttpEntity entity) { Args.notNull(entity, "HTTP entity"); final Header h = entity.getContentType(); if (h != null) { final HeaderElement[] elems = h.getElements(); if (elems.length > 0) { final String contentType = elems[0].getName(); return contentType.equalsIgnoreCase(CONTENT_TYPE); } } return false; } /** * Adds all parameters within the Scanner to the list of {@code parameters}, as encoded by * {@code encoding}. For example, a scanner containing the string {@code a=1&b=2&c=3} would add the * {@link NameValuePair NameValuePairs} a=1, b=2, and c=3 to the list of parameters. By convention, {@code '&'} and * {@code ';'} are accepted as parameter separators. * * @param parameters * List to add parameters to. * @param scanner * Input that contains the parameters to parse. * @param charset * Encoding to use when decoding the parameters. * * @deprecated (4.4) use {@link #parse(String, java.nio.charset.Charset)} */ @Deprecated public static void parse( final List parameters, final Scanner scanner, final String charset) { parse(parameters, scanner, "[" + QP_SEP_A + QP_SEP_S + "]", charset); } /** * Adds all parameters within the Scanner to the list of * {@code parameters}, as encoded by {@code encoding}. For * example, a scanner containing the string {@code a=1&b=2&c=3} would * add the {@link NameValuePair NameValuePairs} a=1, b=2, and c=3 to the * list of parameters. * * @param parameters * List to add parameters to. * @param scanner * Input that contains the parameters to parse. * @param parameterSepartorPattern * The Pattern string for parameter separators, by convention {@code "[&;]"} * @param charset * Encoding to use when decoding the parameters. * * @deprecated (4.4) use {@link #parse(org.apache.http.util.CharArrayBuffer, java.nio.charset.Charset, char...)} */ @Deprecated public static void parse( final List parameters, final Scanner scanner, final String parameterSepartorPattern, final String charset) { scanner.useDelimiter(parameterSepartorPattern); while (scanner.hasNext()) { final String name; final String value; final String token = scanner.next(); final int i = token.indexOf(NAME_VALUE_SEPARATOR); if (i != -1) { name = decodeFormFields(token.substring(0, i).trim(), charset); value = decodeFormFields(token.substring(i + 1).trim(), charset); } else { name = decodeFormFields(token.trim(), charset); value = null; } parameters.add(new BasicNameValuePair(name, value)); } } /** * Returns a list of {@link NameValuePair}s URI query parameters. * By convention, {@code '&'} and {@code ';'} are accepted as parameter separators. * * @param s URI query component. * @param charset charset to use when decoding the parameters. * @return list of query parameters. * * @since 4.2 */ public static List parse(final String s, final Charset charset) { if (s == null) { return createEmptyList(); } final CharArrayBuffer buffer = new CharArrayBuffer(s.length()); buffer.append(s); return parse(buffer, charset, QP_SEP_A, QP_SEP_S); } /** * Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using the given character * encoding. * * @param s input text. * @param charset parameter charset. * @param separators parameter separators. * @return list of query parameters. * * @since 4.3 */ public static List parse(final String s, final Charset charset, final char... separators) { if (s == null) { return createEmptyList(); } final CharArrayBuffer buffer = new CharArrayBuffer(s.length()); buffer.append(s); return parse(buffer, charset, separators); } /** * Returns a list of {@link NameValuePair}s parameters. * * @param buf * text to parse. * @param charset * Encoding to use when decoding the parameters. * @param separators * element separators. * @return a list of {@link NameValuePair} as built from the URI's query portion. * * @since 4.4 */ public static List parse( final CharArrayBuffer buf, final Charset charset, final char... separators) { Args.notNull(buf, "Char array buffer"); final TokenParser tokenParser = TokenParser.INSTANCE; final BitSet delimSet = new BitSet(); for (final char separator: separators) { delimSet.set(separator); } final ParserCursor cursor = new ParserCursor(0, buf.length()); final List list = new ArrayList(); while (!cursor.atEnd()) { delimSet.set('='); final String name = tokenParser.parseToken(buf, cursor, delimSet); String value = null; if (!cursor.atEnd()) { final int delim = buf.charAt(cursor.getPos()); cursor.updatePos(cursor.getPos() + 1); if (delim == '=') { delimSet.clear('='); value = tokenParser.parseToken(buf, cursor, delimSet); if (!cursor.atEnd()) { cursor.updatePos(cursor.getPos() + 1); } } } if (!name.isEmpty()) { list.add(new BasicNameValuePair( decodeFormFields(name, charset), decodeFormFields(value, charset))); } } return list; } static List splitSegments(final CharSequence s, final BitSet separators) { final ParserCursor cursor = new ParserCursor(0, s.length()); // Skip leading separator if (cursor.atEnd()) { return Collections.emptyList(); } if (separators.get(s.charAt(cursor.getPos()))) { cursor.updatePos(cursor.getPos() + 1); } final List list = new ArrayList(); final StringBuilder buf = new StringBuilder(); for (;;) { if (cursor.atEnd()) { list.add(buf.toString()); break; } final char current = s.charAt(cursor.getPos()); if (separators.get(current)) { list.add(buf.toString()); buf.setLength(0); } else { buf.append(current); } cursor.updatePos(cursor.getPos() + 1); } return list; } static List splitPathSegments(final CharSequence s) { return splitSegments(s, PATH_SEPARATORS); } /** * Returns a list of URI path segments. * * @param s URI path component. * @param charset parameter charset. * @return list of segments. * * @since 4.5 */ public static List parsePathSegments(final CharSequence s, final Charset charset) { Args.notNull(s, "Char sequence"); final List list = splitPathSegments(s); for (int i = 0; i < list.size(); i++) { list.set(i, urlDecode(list.get(i), charset != null ? charset : Consts.UTF_8, false)); } return list; } /** * Returns a list of URI path segments. * * @param s URI path component. * @return list of segments. * * @since 4.5 */ public static List parsePathSegments(final CharSequence s) { return parsePathSegments(s, Consts.UTF_8); } /** * Returns a string consisting of joint encoded path segments. * * @param segments the segments. * @param charset parameter charset. * @return URI path component * * @since 4.5 */ public static String formatSegments(final Iterable segments, final Charset charset) { Args.notNull(segments, "Segments"); final StringBuilder result = new StringBuilder(); for (final String segment : segments) { result.append(PATH_SEPARATOR).append(urlEncode(segment, charset, PATHSAFE, false)); } return result.toString(); } /** * Returns a string consisting of joint encoded path segments. * * @param segments the segments. * @return URI path component * * @since 4.5 */ public static String formatSegments(final String... segments) { return formatSegments(Arrays.asList(segments), Consts.UTF_8); } /** * Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded} * list of parameters in an HTTP PUT or HTTP POST. * * @param parameters The parameters to include. * @param charset The encoding to use. * @return An {@code application/x-www-form-urlencoded} string */ public static String format( final List parameters, final String charset) { return format(parameters, QP_SEP_A, charset); } /** * Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded} * list of parameters in an HTTP PUT or HTTP POST. * * @param parameters The parameters to include. * @param parameterSeparator The parameter separator, by convention, {@code '&'} or {@code ';'}. * @param charset The encoding to use. * @return An {@code application/x-www-form-urlencoded} string * * @since 4.3 */ public static String format( final List parameters, final char parameterSeparator, final String charset) { final StringBuilder result = new StringBuilder(); for (final NameValuePair parameter : parameters) { final String encodedName = encodeFormFields(parameter.getName(), charset); final String encodedValue = encodeFormFields(parameter.getValue(), charset); if (result.length() > 0) { result.append(parameterSeparator); } result.append(encodedName); if (encodedValue != null) { result.append(NAME_VALUE_SEPARATOR); result.append(encodedValue); } } return result.toString(); } /** * Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded} * list of parameters in an HTTP PUT or HTTP POST. * * @param parameters The parameters to include. * @param charset The encoding to use. * @return An {@code application/x-www-form-urlencoded} string * * @since 4.2 */ public static String format( final Iterable parameters, final Charset charset) { return format(parameters, QP_SEP_A, charset); } /** * Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded} * list of parameters in an HTTP PUT or HTTP POST. * * @param parameters The parameters to include. * @param parameterSeparator The parameter separator, by convention, {@code '&'} or {@code ';'}. * @param charset The encoding to use. * @return An {@code application/x-www-form-urlencoded} string * * @since 4.3 */ public static String format( final Iterable parameters, final char parameterSeparator, final Charset charset) { Args.notNull(parameters, "Parameters"); final StringBuilder result = new StringBuilder(); for (final NameValuePair parameter : parameters) { final String encodedName = encodeFormFields(parameter.getName(), charset); final String encodedValue = encodeFormFields(parameter.getValue(), charset); if (result.length() > 0) { result.append(parameterSeparator); } result.append(encodedName); if (encodedValue != null) { result.append(NAME_VALUE_SEPARATOR); result.append(encodedValue); } } return result.toString(); } /** * Unreserved characters, i.e. alphanumeric, plus: {@code _ - ! . ~ ' ( ) *} *

* This list is the same as the {@code unreserved} list in * RFC 2396 */ private static final BitSet UNRESERVED = new BitSet(256); /** * Punctuation characters: , ; : $ & + = *

* These are the additional characters allowed by userinfo. */ private static final BitSet PUNCT = new BitSet(256); /** Characters which are safe to use in userinfo, * i.e. {@link #UNRESERVED} plus {@link #PUNCT}uation */ private static final BitSet USERINFO = new BitSet(256); /** Characters which are safe to use in a path, * i.e. {@link #UNRESERVED} plus {@link #PUNCT}uation plus / @ */ private static final BitSet PATHSAFE = new BitSet(256); /** Characters which are safe to use in a query or a fragment, * i.e. {@link #RESERVED} plus {@link #UNRESERVED} */ private static final BitSet URIC = new BitSet(256); /** * Reserved characters, i.e. {@code ;/?:@&=+$,[]} *

* This list is the same as the {@code reserved} list in * RFC 2396 * as augmented by * RFC 2732 */ private static final BitSet RESERVED = new BitSet(256); /** * Safe characters for x-www-form-urlencoded data, as per java.net.URLEncoder and browser behaviour, * i.e. alphanumeric plus {@code "-", "_", ".", "*"} */ private static final BitSet URLENCODER = new BitSet(256); private static final BitSet PATH_SPECIAL = new BitSet(256); static { // unreserved chars // alpha characters for (int i = 'a'; i <= 'z'; i++) { UNRESERVED.set(i); } for (int i = 'A'; i <= 'Z'; i++) { UNRESERVED.set(i); } // numeric characters for (int i = '0'; i <= '9'; i++) { UNRESERVED.set(i); } UNRESERVED.set('_'); // these are the charactes of the "mark" list UNRESERVED.set('-'); UNRESERVED.set('.'); UNRESERVED.set('*'); URLENCODER.or(UNRESERVED); // skip remaining unreserved characters UNRESERVED.set('!'); UNRESERVED.set('~'); UNRESERVED.set('\''); UNRESERVED.set('('); UNRESERVED.set(')'); // punct chars PUNCT.set(','); PUNCT.set(';'); PUNCT.set(':'); PUNCT.set('$'); PUNCT.set('&'); PUNCT.set('+'); PUNCT.set('='); // Safe for userinfo USERINFO.or(UNRESERVED); USERINFO.or(PUNCT); // URL path safe PATHSAFE.or(UNRESERVED); PATHSAFE.set(';'); // param separator PATHSAFE.set(':'); // RFC 2396 PATHSAFE.set('@'); PATHSAFE.set('&'); PATHSAFE.set('='); PATHSAFE.set('+'); PATHSAFE.set('$'); PATHSAFE.set(','); PATH_SPECIAL.or(PATHSAFE); PATH_SPECIAL.set('/'); RESERVED.set(';'); RESERVED.set('/'); RESERVED.set('?'); RESERVED.set(':'); RESERVED.set('@'); RESERVED.set('&'); RESERVED.set('='); RESERVED.set('+'); RESERVED.set('$'); RESERVED.set(','); RESERVED.set('['); // added by RFC 2732 RESERVED.set(']'); // added by RFC 2732 URIC.or(RESERVED); URIC.or(UNRESERVED); } private static final int RADIX = 16; private static List createEmptyList() { return new ArrayList(0); } private static String urlEncode( final String content, final Charset charset, final BitSet safechars, final boolean blankAsPlus) { if (content == null) { return null; } final StringBuilder buf = new StringBuilder(); final ByteBuffer bb = charset.encode(content); while (bb.hasRemaining()) { final int b = bb.get() & 0xff; if (safechars.get(b)) { buf.append((char) b); } else if (blankAsPlus && b == ' ') { buf.append('+'); } else { buf.append("%"); final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX)); final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX)); buf.append(hex1); buf.append(hex2); } } return buf.toString(); } /** * Decode/unescape a portion of a URL, to use with the query part ensure {@code plusAsBlank} is true. * * @param content the portion to decode * @param charset the charset to use * @param plusAsBlank if {@code true}, then convert '+' to space (e.g. for www-url-form-encoded content), otherwise leave as is. * @return encoded string */ private static String urlDecode( final String content, final Charset charset, final boolean plusAsBlank) { if (content == null) { return null; } final ByteBuffer bb = ByteBuffer.allocate(content.length()); final CharBuffer cb = CharBuffer.wrap(content); while (cb.hasRemaining()) { final char c = cb.get(); if (c == '%' && cb.remaining() >= 2) { final char uc = cb.get(); final char lc = cb.get(); final int u = Character.digit(uc, 16); final int l = Character.digit(lc, 16); if (u != -1 && l != -1) { bb.put((byte) ((u << 4) + l)); } else { bb.put((byte) '%'); bb.put((byte) uc); bb.put((byte) lc); } } else if (plusAsBlank && c == '+') { bb.put((byte) ' '); } else { bb.put((byte) c); } } bb.flip(); return charset.decode(bb).toString(); } /** * Decode/unescape www-url-form-encoded content. * * @param content the content to decode, will decode '+' as space * @param charset the charset to use * @return encoded string */ private static String decodeFormFields (final String content, final String charset) { if (content == null) { return null; } return urlDecode(content, charset != null ? Charset.forName(charset) : Consts.UTF_8, true); } /** * Decode/unescape www-url-form-encoded content. * * @param content the content to decode, will decode '+' as space * @param charset the charset to use * @return encoded string */ private static String decodeFormFields (final String content, final Charset charset) { if (content == null) { return null; } return urlDecode(content, charset != null ? charset : Consts.UTF_8, true); } /** * Encode/escape www-url-form-encoded content. *

* Uses the {@link #URLENCODER} set of characters, rather than * the {@link #UNRESERVED} set; this is for compatibilty with previous * releases, URLEncoder.encode() and most browsers. * * @param content the content to encode, will convert space to '+' * @param charset the charset to use * @return encoded string */ private static String encodeFormFields(final String content, final String charset) { if (content == null) { return null; } return urlEncode(content, charset != null ? Charset.forName(charset) : Consts.UTF_8, URLENCODER, true); } /** * Encode/escape www-url-form-encoded content. *

* Uses the {@link #URLENCODER} set of characters, rather than * the {@link #UNRESERVED} set; this is for compatibilty with previous * releases, URLEncoder.encode() and most browsers. * * @param content the content to encode, will convert space to '+' * @param charset the charset to use * @return encoded string */ private static String encodeFormFields (final String content, final Charset charset) { if (content == null) { return null; } return urlEncode(content, charset != null ? charset : Consts.UTF_8, URLENCODER, true); } /** * Encode a String using the {@link #USERINFO} set of characters. *

* Used by URIBuilder to encode the userinfo segment. * * @param content the string to encode, does not convert space to '+' * @param charset the charset to use * @return the encoded string */ static String encUserInfo(final String content, final Charset charset) { return urlEncode(content, charset, USERINFO, false); } /** * Encode a String using the {@link #URIC} set of characters. *

* Used by URIBuilder to encode the query and fragment segments. * * @param content the string to encode, does not convert space to '+' * @param charset the charset to use * @return the encoded string */ static String encUric(final String content, final Charset charset) { return urlEncode(content, charset, URIC, false); } /** * Encode a String using the {@link #PATH_SPECIAL} set of characters. *

* Used by URIBuilder to encode path segments. * * @param content the string to encode, does not convert space to '+' * @param charset the charset to use * @return the encoded string */ static String encPath(final String content, final Charset charset) { return urlEncode(content, charset, PATH_SPECIAL, false); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy