All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.palominolabs.http.url.UrlPercentEncoders Maven / Gradle / Ivy

There is a newer version: 1.1.5
Show newest version
/*
 * Copyright (c) 2012 Palomino Labs, Inc.
 */

package com.palominolabs.http.url;

import java.util.BitSet;

import static com.google.common.base.Charsets.UTF_8;

/**
 * See RFC 3986, RFC 1738 and http://www.lunatech-research.com/archives/2009/02/03/what-every-web-developer-must-know-about-url-encoding.
 */
final class UrlPercentEncoders {

    /**
     * an encoder for RFC 3986 reg-names
     */

    private static final BitSet REG_NAME_BIT_SET = new BitSet();

    private static final BitSet PATH_BIT_SET = new BitSet();
    private static final BitSet MATRIX_BIT_SET = new BitSet();
    private static final BitSet QUERY_BIT_SET = new BitSet();
    private static final BitSet FRAGMENT_BIT_SET = new BitSet();

    static {
        // RFC 3986 'reg-name'. This is not very aggressive... it's quite possible to have DNS-illegal names out of this.
        // Regardless, it will at least be URI-compliant even if it's not HTTP URL-compliant.
        addUnreserved(REG_NAME_BIT_SET);
        addSubdelims(REG_NAME_BIT_SET);

        // Represents RFC 3986 'pchar'. Remove delimiter that starts matrix section.
        addPChar(PATH_BIT_SET);
        PATH_BIT_SET.clear((int) ';');

        // Remove delims for HTTP matrix params as per RFC 1738 S3.3. The other reserved chars ('/' and '?') are already excluded.
        addPChar(MATRIX_BIT_SET);
        MATRIX_BIT_SET.clear((int) ';');
        MATRIX_BIT_SET.clear((int) '=');

        /*
        * at this point it represents RFC 3986 'query'.
        * Remove delimiters for HTTP queries
        * http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 also specifies that "+" can mean space in a query,
        * so we will make sure to say that '+' is not safe to leave as-is
        */
        addQuery(QUERY_BIT_SET);
        QUERY_BIT_SET.clear((int) '=');
        QUERY_BIT_SET.clear((int) '&');
        QUERY_BIT_SET.clear((int) '+');

        addFragment(FRAGMENT_BIT_SET);
    }

    static PercentEncoder getRegNameEncoder() {
        return new PercentEncoder(REG_NAME_BIT_SET, UTF_8);
    }

    static PercentEncoder getPathEncoder() {
        return new PercentEncoder(PATH_BIT_SET, UTF_8);
    }

    static PercentEncoder getMatrixEncoder() {
        return new PercentEncoder(MATRIX_BIT_SET, UTF_8);
    }

    static PercentEncoder getQueryEncoder() {
        return new PercentEncoder(QUERY_BIT_SET, UTF_8);
    }

    static PercentEncoder getFragmentEncoder() {
        return new PercentEncoder(FRAGMENT_BIT_SET, UTF_8);
    }

    private UrlPercentEncoders() {
    }

    /**
     * Add code points for 'fragment' chars
     *
     * @param fragmentBitSet bit set
     */
    private static void addFragment(BitSet fragmentBitSet) {
        addPChar(fragmentBitSet);
        fragmentBitSet.set((int) '/');
        fragmentBitSet.set((int) '?');
    }

    /**
     * Add code points for 'query' chars
     *
     * @param queryBitSet bit set
     */
    private static void addQuery(BitSet queryBitSet) {
        addPChar(queryBitSet);
        queryBitSet.set((int) '/');
        queryBitSet.set((int) '?');
    }

    /**
     * Add code points for 'pchar' chars.
     *
     * @param bs bitset
     */
    private static void addPChar(BitSet bs) {
        addUnreserved(bs);
        addSubdelims(bs);
        bs.set((int) ':');
        bs.set((int) '@');
    }

    /**
     * Add codepoints for 'unreserved' chars
     *
     * @param bs bitset to add codepoints to
     */
    private static void addUnreserved(BitSet bs) {

        for (int i = 'a'; i <= 'z'; i++) {
            bs.set(i);
        }
        for (int i = 'A'; i <= 'Z'; i++) {
            bs.set(i);
        }
        for (int i = '0'; i <= '9'; i++) {
            bs.set(i);
        }
        bs.set((int) '-');
        bs.set((int) '.');
        bs.set((int) '_');
        bs.set((int) '~');
    }

    /**
     * Add codepoints for 'sub-delims' chars
     *
     * @param bs bitset to add codepoints to
     */
    private static void addSubdelims(BitSet bs) {
        bs.set((int) '!');
        bs.set((int) '$');
        bs.set((int) '&');
        bs.set((int) '\'');
        bs.set((int) '(');
        bs.set((int) ')');
        bs.set((int) '*');
        bs.set((int) '+');
        bs.set((int) ',');
        bs.set((int) ';');
        bs.set((int) '=');
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy