All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.xbib.net.PercentEncoders Maven / Gradle / Ivy

package org.xbib.net;

import java.nio.charset.Charset;
import java.util.BitSet;

import static java.nio.charset.CodingErrorAction.REPORT;

/**
 * See RFC 3986, RFC 1738 and http://www.lunatech-research.com/archives/2009/02/03/what-every-web-developer-must-know-about-url-encoding.
 */
public class PercentEncoders {

    private static final BitSet UNRESERVED_BIT_SET = new BitSet();
    /**
     * an encoder for RFC 3986 reg-names.
     */
    private static final BitSet REG_NAME_BIT_SET = new BitSet();
    private static final BitSet PATH_BIT_SET = new BitSet();
    private static final BitSet MATRIX_BIT_SET = new BitSet();
    private static final BitSet QUERY_BIT_SET = new BitSet();
    private static final BitSet QUERY_PARAM_BIT_SET = new BitSet();
    private static final BitSet FRAGMENT_BIT_SET = new BitSet();

    static {
        // minimal encoding, for URI templates RFC 6570
        addUnreserved(UNRESERVED_BIT_SET);
        // RFC 3986 'reg-name'. This is not very aggressive.
        // It's quite possible to have DNS-illegal names out of this.
        // Regardless, it will at least be URI-compliant even if it's not HTTP URL-compliant.
        addUnreserved(REG_NAME_BIT_SET);
        addSubdelims(REG_NAME_BIT_SET);
        // Represents RFC 3986 'pchar'. Remove delimiter that starts matrix section.
        addPChar(PATH_BIT_SET);
        PATH_BIT_SET.clear((int) ';');
        // Remove delims for HTTP matrix params as per RFC 1738 S3.3.
        // The other reserved chars ('/' and '?') are already excluded.
        addPChar(MATRIX_BIT_SET);
        MATRIX_BIT_SET.clear((int) ';');
        MATRIX_BIT_SET.clear((int) '=');
        /*
         * At this point it represents RFC 3986 'query'. http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 also
         * specifies that "+" can mean space in a query, so we will make sure to say that '+' is not safe to leave as-is
        */
        addQuery(QUERY_BIT_SET);
        QUERY_BIT_SET.clear((int) '+');
        /*
         * Create more stringent requirements for HTML4 queries: remove delimiters for HTML query params so that key=value
         * pairs can be used.
         */
        QUERY_PARAM_BIT_SET.or(QUERY_BIT_SET);
        QUERY_PARAM_BIT_SET.clear((int) '=');
        QUERY_PARAM_BIT_SET.clear((int) '&');
        addFragment(FRAGMENT_BIT_SET);
    }

    public static PercentEncoder getUnreservedEncoder(Charset charset) {
        return new PercentEncoder(UNRESERVED_BIT_SET,
                charset.newEncoder().onMalformedInput(REPORT).onUnmappableCharacter(REPORT));
    }

    public static PercentEncoder getCookieEncoder(Charset charset) {
        return new PercentEncoder(UNRESERVED_BIT_SET,
                charset.newEncoder().onMalformedInput(REPORT).onUnmappableCharacter(REPORT));
    }

    public static PercentEncoder getRegNameEncoder(Charset charset) {
        return new PercentEncoder(REG_NAME_BIT_SET,
                charset.newEncoder().onMalformedInput(REPORT).onUnmappableCharacter(REPORT));
    }

    public static PercentEncoder getPathEncoder(Charset charset) {
        return new PercentEncoder(PATH_BIT_SET,
                charset.newEncoder().onMalformedInput(REPORT).onUnmappableCharacter(REPORT));
    }

    public static PercentEncoder getMatrixEncoder(Charset charset) {
        return new PercentEncoder(MATRIX_BIT_SET,
                charset.newEncoder().onMalformedInput(REPORT).onUnmappableCharacter(REPORT));
    }

    public static PercentEncoder getQueryEncoder(Charset charset) {
        return new PercentEncoder(QUERY_BIT_SET,
                charset.newEncoder().onMalformedInput(REPORT).onUnmappableCharacter(REPORT));
    }

    public static PercentEncoder getQueryParamEncoder(Charset charset) {
        return new PercentEncoder(QUERY_PARAM_BIT_SET,
                charset.newEncoder().onMalformedInput(REPORT).onUnmappableCharacter(REPORT));
    }

    public static PercentEncoder getFragmentEncoder(Charset charset) {
        return new PercentEncoder(FRAGMENT_BIT_SET,
                charset.newEncoder().onMalformedInput(REPORT).onUnmappableCharacter(REPORT));
    }

    private PercentEncoders() {
    }

    /**
     * Add code points for 'fragment' chars.
     *
     * @param fragmentBitSet bit set
     */
    private static void addFragment(BitSet fragmentBitSet) {
        addPChar(fragmentBitSet);
        fragmentBitSet.set((int) '/');
        fragmentBitSet.set((int) '?');
    }

    /**
     * Add code points for 'query' chars.
     *
     * @param queryBitSet bit set
     */
    private static void addQuery(BitSet queryBitSet) {
        addPChar(queryBitSet);
        queryBitSet.set((int) '/');
        queryBitSet.set((int) '?');
    }

    /**
     * Add code points for 'pchar' chars.
     *
     * @param bs bitset
     */
    private static void addPChar(BitSet bs) {
        addUnreserved(bs);
        addSubdelims(bs);
        bs.set((int) ':');
        bs.set((int) '@');
    }

    /**
     * Add codepoints for 'unreserved' chars.
     *
     * @param bs bitset to add codepoints to
     */
    private static void addUnreserved(BitSet bs) {
        for (int i = 'a'; i <= 'z'; i++) {
            bs.set(i);
        }
        for (int i = 'A'; i <= 'Z'; i++) {
            bs.set(i);
        }
        for (int i = '0'; i <= '9'; i++) {
            bs.set(i);
        }
        bs.set((int) '-');
        bs.set((int) '.');
        bs.set((int) '_');
        bs.set((int) '~');
    }

    /**
     * Add codepoints for 'sub-delims' chars.
     *
     * @param bs bitset to add codepoints to
     */
    private static void addSubdelims(BitSet bs) {
        bs.set((int) '!');
        bs.set((int) '$');
        bs.set((int) '&');
        bs.set((int) '\'');
        bs.set((int) '(');
        bs.set((int) ')');
        bs.set((int) '*');
        bs.set((int) '+');
        bs.set((int) ',');
        bs.set((int) ';');
        bs.set((int) '=');
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy