All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.wink.common.internal.uri.UriEncoder Maven / Gradle / Ivy

There is a newer version: 1.4
Show newest version
/*******************************************************************************
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *  
 *   http://www.apache.org/licenses/LICENSE-2.0
 *  
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License.
 *  
 *******************************************************************************/

package org.apache.wink.common.internal.uri;

import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.List;

import javax.ws.rs.core.MultivaluedMap;

import org.apache.wink.common.internal.MultivaluedMapImpl;

/**
 * URI Encoding and Decoding
 */
public final class UriEncoder {

    private static final Charset CHARSET_UTF_8 = Charset.forName("UTF-8"); //$NON-NLS-1$

    private UriEncoder() {
        // no instances
    }

    /** Hexadecimal digits for escaping. */
    private static final char[]    hexDigits           =
                                                           {'0', '1', '2', '3', '4', '5', '6', '7',
        '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'             };

    private static final byte[]    normalizedHexDigits = new byte[128];

    private static final boolean[] isHexDigit          = new boolean[128];

    /**
     * Unreserved characters according to RFC 3986. Each character below ASCII
     * 128 has single array item with true if it is unreserved and false if it
     * is reserved.
     */
    public static final boolean[]  unreservedChars     = new boolean[128];
    public static final boolean[]  userInfoChars       = new boolean[128];
    public static final boolean[]  segmentChars        = new boolean[128];
    public static final boolean[]  matrixChars         = new boolean[128];
    public static final boolean[]  pathChars           = new boolean[128];
    public static final boolean[]  queryChars          = new boolean[128];
    public static final boolean[]  queryParamChars     = new boolean[128];
    public static final boolean[]  fragmentChars       = new boolean[128];
    public static final boolean[]  uriChars            = new boolean[128];
    public static final boolean[]  uriTemplateChars    = new boolean[128];

    static {
        // unreserved - ALPHA / DIGIT / "-" / "." / "_" / "~"
        Arrays.fill(unreservedChars, false);
        Arrays.fill(unreservedChars, 'a', 'z' + 1, true);
        Arrays.fill(unreservedChars, 'A', 'Z' + 1, true);
        Arrays.fill(unreservedChars, '0', '9' + 1, true);
        unreservedChars['-'] = true;
        unreservedChars['_'] = true;
        unreservedChars['.'] = true;
        unreservedChars['~'] = true;

        // sub delimiters - "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / ","
        // / ";" / "="
        // user info chars - *( unreserved / pct-encoded / sub-delims / ":" )
        System.arraycopy(unreservedChars, 0, userInfoChars, 0, 128);
        userInfoChars['!'] = true;
        userInfoChars['$'] = true;
        userInfoChars['&'] = true;
        userInfoChars['\''] = true;
        userInfoChars['('] = true;
        userInfoChars[')'] = true;
        userInfoChars['*'] = true;
        userInfoChars['+'] = true;
        userInfoChars[','] = true;
        userInfoChars[';'] = true;
        userInfoChars['='] = true;
        userInfoChars[':'] = true;

        // segment - *(unreserved / pct-encoded / sub-delims / ":" / "@")
        System.arraycopy(userInfoChars, 0, segmentChars, 0, 128);
        segmentChars['@'] = true;

        // matrix - *(unreserved / pct-encoded / sub-delims / ":" / "@") without
        // "=" and ";"
        System.arraycopy(segmentChars, 0, matrixChars, 0, 128);
        matrixChars['='] = false;
        matrixChars[';'] = false;

        // path - *(unreserved / pct-encoded / sub-delims / ":" / "@" / "/")
        System.arraycopy(segmentChars, 0, pathChars, 0, 128);
        pathChars['/'] = true;

        // query - *(unreserved / pct-encoded / sub-delims / ":" / "@" / "/" /
        // "?")
        System.arraycopy(pathChars, 0, queryChars, 0, 128);
        queryChars['?'] = true;

        // fragment - *(unreserved / pct-encoded / sub-delims / ":" / "@" / "/"
        // / "?")
        System.arraycopy(queryChars, 0, fragmentChars, 0, 128);

        // query param - *(unreserved / pct-encoded / sub-delims / ":" / "@" /
        // "/" / "?") without
        // "&" and "="
        System.arraycopy(queryChars, 0, queryParamChars, 0, 128);
        queryParamChars['&'] = false;
        queryParamChars['='] = false;

        // uri - *(unreserved / pct-encoded / sub-delims / ":" / "@" / "/" / "?"
        // / "#" / "[" / "]" )
        System.arraycopy(queryChars, 0, uriChars, 0, 128);
        uriChars['#'] = true;
        uriChars['['] = true;
        uriChars[']'] = true;

        // uri template - *(unreserved / pct-encoded / sub-delims / ":" / "@" /
        // "/" / "?" / "#" /
        // "[" / "]" / "{" / "}" )
        System.arraycopy(uriChars, 0, uriTemplateChars, 0, 128);
        uriTemplateChars['{'] = true;
        uriTemplateChars['}'] = true;

        // fill the isHex array
        Arrays.fill(isHexDigit, false);
        Arrays.fill(isHexDigit, '0', '9' + 1, true);
        Arrays.fill(isHexDigit, 'a', 'f' + 1, true);
        Arrays.fill(isHexDigit, 'A', 'F' + 1, true);

        // fill the normalizedHexDigits array
        normalizedHexDigits['0'] = '0';
        normalizedHexDigits['1'] = '1';
        normalizedHexDigits['2'] = '2';
        normalizedHexDigits['3'] = '3';
        normalizedHexDigits['4'] = '4';
        normalizedHexDigits['5'] = '5';
        normalizedHexDigits['6'] = '6';
        normalizedHexDigits['7'] = '7';
        normalizedHexDigits['8'] = '8';
        normalizedHexDigits['9'] = '9';
        normalizedHexDigits['A'] = 'A';
        normalizedHexDigits['B'] = 'B';
        normalizedHexDigits['C'] = 'C';
        normalizedHexDigits['D'] = 'D';
        normalizedHexDigits['E'] = 'E';
        normalizedHexDigits['F'] = 'F';
        normalizedHexDigits['a'] = 'A';
        normalizedHexDigits['b'] = 'B';
        normalizedHexDigits['c'] = 'C';
        normalizedHexDigits['d'] = 'D';
        normalizedHexDigits['e'] = 'E';
        normalizedHexDigits['f'] = 'F';

    }

    private static int decodeHexDigit(char c) {

        // Decode single hexadecimal digit. On error returns 0 (ignores errors).
        if (c >= '0' && c <= '9') {
            return c - '0';
        } else if (c >= 'a' && c <= 'f') {
            return c - 'a' + 10;
        } else if (c >= 'A' && c <= 'F') {
            return c - 'A' + 10;
        } else {
            return 0;
        }
    }

    /**
     * Encode all characters other than unreserved according to RFC 3986.
     * 
     * @param string string to encode
     * @return encoded US-ASCII string
     */
    public static String encodeString(String string) {
        return encode(string, false, unreservedChars);
    }

    /**
     * Encode user info according to RFC 3986.
     * 
     * @param userInfo the user info to encode
     * @param relax if true, then any sequence of chars in the input string that
     *            have the form '%XX', where XX are two HEX digits, will not be
     *            encoded
     * @return encoded user info string
     */
    public static String encodeUserInfo(String userInfo, boolean relax) {
        return encode(userInfo, relax, userInfoChars);
    }

    /**
     * Encode a path segment (without matrix parameters) according to RFC 3986.
     * 
     * @param segment the segment (without matrix parameters) to encode
     * @param relax if true, then any sequence of chars in the input string that
     *            have the form '%XX', where XX are two HEX digits, will not be
     *            encoded
     * @return encoded segment string
     */
    public static String encodePathSegment(String segment, boolean relax) {
        return encode(segment, relax, segmentChars);
    }

    /**
     * Encode a matrix parameter (name or value) according to RFC 3986.
     * 
     * @param matrix the matrix parameter (name or value) to encode
     * @param relax if true, then any sequence of chars in the input string that
     *            have the form '%XX', where XX are two HEX digits, will not be
     *            encoded
     * @return encoded matrix string
     */
    public static String encodeMatrix(String matrix, boolean relax) {
        return encode(matrix, relax, matrixChars);
    }

    /**
     * Encode a complete path string according to RFC 3986.
     * 
     * @param path the path string to encode
     * @param relax if true, then any sequence of chars in the input string that
     *            have the form '%XX', where XX are two HEX digits, will not be
     *            encoded
     * @return encoded path string
     */
    public static String encodePath(String path, boolean relax) {
        return encode(path, relax, pathChars);
    }

    /**
     * Encode a query parameter (name or value) according to RFC 3986.
     * 
     * @param queryParam the query parameter string to encode
     * @param relax if true, then any sequence of chars in the input string that
     *            have the form '%XX', where XX are two HEX digits, will not be
     *            encoded
     * @return encoded query parameter string
     */
    public static String encodeQueryParam(String queryParam, boolean relax) {
        boolean[] unreserved = queryParamChars;
        String string = queryParam;

        if (queryParam == null) {
            return null;
        }

        if (!needsEncoding(queryParam, false, unreserved)) {
            return string;
        }

        // Encode to UTF-8
        ByteBuffer buffer = CHARSET_UTF_8.encode(string);
        // Prepare string buffer
        StringBuilder sb = new StringBuilder(buffer.remaining());
        // Now encode the characters
        while (buffer.hasRemaining()) {
            int c = buffer.get();

            if ((c == '%') && relax && (buffer.remaining() >= 2)) {
                int position = buffer.position();
                if (isHex(buffer.get(position)) && isHex(buffer.get(position + 1))) {
                    sb.append((char)c);
                    continue;
                }
            }

            if ((c >= ' ' && unreserved[c])) {
                sb.append((char)c);
            } else if ((c == ' ')) {
                sb.append('+');
            } else {
                sb.append('%');
                sb.append(hexDigits[(c & 0xf0) >> 4]);
                sb.append(hexDigits[c & 0xf]);
            }
        }

        return sb.toString();
    }

    /**
     * Encode a complete query string according to RFC 3986.
     * 
     * @param query the query string to encode
     * @param relax if true, then any sequence of chars in the input string that
     *            have the form '%XX', where XX are two HEX digits, will not be
     *            encoded
     * @return encoded query string
     */
    public static String encodeQuery(String query, boolean relax) {
        return encode(query, relax, queryChars);
    }

    /**
     * Encode a fragment string according to RFC 3986.
     * 
     * @param fragment the fragment string to encode
     * @param relax if true, then any sequence of chars in the input string that
     *            have the form '%XX', where XX are two HEX digits, will not be
     *            encoded
     * @return encoded fragment string
     */
    public static String encodeFragment(String fragment, boolean relax) {
        return encode(fragment, relax, fragmentChars);
    }

    /**
     * Encode a uri according to RFC 3986, escaping all
     * reserved characters.
     * 
     * @param uri string to encode
     * @param relax if true, then any sequence of chars in the input of the form
     *            '%XX', where XX are two HEX digits, will not be encoded.
     * @return encoded US-ASCII string
     */
    public static String encodeUri(String uri, boolean relax) {
        return encode(uri, relax, uriChars);
    }

    /**
     * Encode a uri template according to RFC 3986, escaping all
     * reserved characters, except for '{' and '}'.
     * 
     * @param uriTemplate template to encode
     * @param relax if true, then any sequence of chars in the input of the form
     *            '%XX', where XX are two HEX digits, will not be encoded.
     * @return encoded US-ASCII string
     */
    public static String encodeUriTemplate(String uriTemplate, boolean relax) {
        return encode(uriTemplate, relax, uriTemplateChars);
    }

    /**
     * Encode a string according to RFC 3986, escaping all
     * characters where unreserved[char] == false, where
     * char is a single character such as 'a'.
     * 
     * @param string string to encode
     * @param relax if true, then any sequence of chars in the input string that
     *            have the form '%XX', where XX are two HEX digits, will not be
     *            encoded.
     * @param unreserved an array of booleans that indicates which characters
     *            are considered unreserved. a character is considered
     *            unreserved if unreserved[char] == true, in which
     *            case it will not be encoded
     * @return encoded US-ASCII string
     */
    private static String encode(String string, boolean relax, boolean[] unreserved) {
        if (string == null) {
            return null;
        }

        if (!needsEncoding(string, false, unreserved)) {
            return string;
        }

        // Encode to UTF-8
        ByteBuffer buffer = CHARSET_UTF_8.encode(string);
        // Prepare string buffer
        StringBuilder sb = new StringBuilder(buffer.remaining());
        // Now encode the characters
        while (buffer.hasRemaining()) {
            int c = buffer.get();

            if ((c == '%') && relax && (buffer.remaining() >= 2)) {
                int position = buffer.position();
                if (isHex(buffer.get(position)) && isHex(buffer.get(position + 1))) {
                    sb.append((char)c);
                    continue;
                }
            }

            if ((c >= ' ' && unreserved[c])) {
                sb.append((char)c);
            } else {
                sb.append('%');
                sb.append(hexDigits[(c & 0xf0) >> 4]);
                sb.append(hexDigits[c & 0xf]);
            }
        }

        return sb.toString();
    }

    private static boolean isHex(int c) {
        return isHexDigit[c];
    }

    /**
     * Determines if the input string contains any invalid URI characters that
     * require encoding
     * 
     * @param uri the string to test
     * @return true if the the input string contains only valid URI characters
     */
    private static boolean needsEncoding(String s, boolean relax, boolean[] unreserved) {
        int len = s.length();
        for (int i = 0; i < len; ++i) {
            char c = s.charAt(i);
            if (c == '%' && relax) {
                continue;
            }
            if (c > unreserved.length) {
                return true;
            }
            if (unreserved[c] == false) {
                return true;
            }
        }
        return false;
    }

    /**
     * Decode US-ASCII uri according to RFC 3986 and replaces all
     * occurrences of the '+' sign with spaces.
     * 
     * @param string query string to decode
     * @return decoded query
     */
    public static String decodeQuery(String string) {
        return decodeString(string, true, null);
    }

    /**
     * Decode US-ASCII uri according to RFC 3986.
     * 
     * @param string US-ASCII uri to decode
     * @return decoded uri
     */
    public static String decodeString(String string) {
        return decodeString(string, false, null);
    }

    /**
     * Decodes only the unreserved chars, according to RFC 3986 section 6.2.2.2
     * 
     * @param string US-ASCII uri to decode
     * @return decoded uri
     */
    public static String normalize(String string) {
        return decodeString(string, false, unreservedChars);
    }

    private static String decodeString(String string, boolean query, boolean[] decodeChars) {
        if (string == null) {
            return null;
        }

        if (!needsDecoding(string, query)) {
            return string;
        }

        int len = string.length();
        // Prepare byte buffer
        ByteBuffer buffer = ByteBuffer.allocate(len);
        // decode string into byte buffer
        for (int i = 0; i < len; ++i) {
            char c = string.charAt(i);
            if (c == '%' && (i + 2 < len)) {
                int v = 0;
                int d1 = decodeHexDigit(string.charAt(i + 1));
                int d2 = decodeHexDigit(string.charAt(i + 2));
                if (d1 >= 0 && d2 >= 0) {
                    v = d1;
                    v = v << 4 | d2;
                    if (decodeChars != null && (v >= decodeChars.length || !decodeChars[v])) {
                        buffer.put((byte)string.charAt(i));
                        buffer.put(normalizedHexDigits[string.charAt(i + 1)]);
                        buffer.put(normalizedHexDigits[string.charAt(i + 2)]);
                    } else {
                        buffer.put((byte)v);
                    }
                    i += 2;
                } else {
                    buffer.put((byte)c);
                }
            } else {
                if (query && c == '+') {
                    c = ' ';
                }
                buffer.put((byte)c);
            }
        }
        // Decode byte buffer from UTF-8
        buffer.flip();
        return CHARSET_UTF_8.decode(buffer).toString();
    }

    private static boolean needsDecoding(String s, boolean query) {
        boolean needs = s.indexOf('%') != -1;
        if (!needs && query) {
            needs = s.indexOf('+') != -1;
        }
        return needs;
    }

    public static MultivaluedMap decodeMultivaluedMapValues(MultivaluedMap map) {
        return decodeMultivaluedMap(map, false);
    }

    public static MultivaluedMap decodeMultivaluedMap(MultivaluedMap map,
                                                                      boolean decodeKeys) {
        MultivaluedMap result = new MultivaluedMapImpl();
        for (String key : map.keySet()) {
            List list = map.get(key);
            if (decodeKeys) {
                key = decodeString(key);
            }
            for (String value : list) {
                result.add(key, decodeString(value));
            }
        }
        return result;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy