de.schlichtherle.truezip.util.UriDecoder Maven / Gradle / Ivy

Go to download
/*
 * Copyright (C) 2005-2015 Schlichtherle IT Services.
 * All rights reserved. Use is subject to license terms.
 */
package de.schlichtherle.truezip.util;

import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import static java.nio.charset.CoderResult.OVERFLOW;
import static java.nio.charset.CoderResult.UNDERFLOW;
import javax.annotation.CheckForNull;
import javax.annotation.concurrent.NotThreadSafe;

/**
 * Decodes quoted characters in URI components according to
 * RFC 2396
 * and its updates in
 * RFC 2732
 * for IPv6 addresses.
 *
 * @see 
 *      RFC 2396: Uniform Resource Identifiers (URI): Generic Syntax
 * @see 
 *      RFC 2732: Format for Literal IPv6 Addresses in URL's
 * @see UriEncoder
 * @author Christian Schlichtherle
 */
@NotThreadSafe
public final class UriDecoder {

    private static final Charset UTF8 = Charset.forName("UTF-8");

    private final CharsetDecoder decoder;

    private @CheckForNull StringBuilder stringBuilder;

    /**
     * Constructs a new URI decoder which uses the UTF-8 character set to
     * decode non-US-ASCII characters.
     */
    public UriDecoder() {
        this(null);
    }

    /**
     * Constructs a new URI decoder which uses the given character set to
     * decode non-US-ASCII characters.
     * 
     * @param charset the character set to use for encoding non-US-ASCII
     *        characters.
     *        If this parameter is {@code null}, then it defaults to
     *        {@code UTF-8}.
     *        Note that providing any other value than {@code null} or
     *        {@code UTF-8} will void interoperability with most applications.
     */
    public UriDecoder(@CheckForNull Charset charset) {
        if (null == charset)
            charset = UTF8;
        this.decoder = charset.newDecoder();
    }

    private static int dequote(final CharBuffer eC) {
        if (eC.hasRemaining()) {
            final char ec0 = eC.get();
            if (eC.hasRemaining()) {
                final char ec1 = eC.get();
                return (dequote(ec0) << 4) | dequote(ec1);
            }
        }
        return -1;
    }

    private static int dequote(char ec) {
	if ('0' <= ec && ec <= '9')
	    return ec - '0';
        ec &= ~(2 << 4); // toUpperCase for 'a' to 'z'
	if ('A' <= ec && ec <= 'F')
	    return ec - 'A' + 10;
	return -1;
    }

    /**
     * Decodes all escape sequences in the string {@code eS}, that is,
     * each occurence of "%XX", where X is a hexadecimal digit,
     * gets substituted with the corresponding single byte and the resulting
     * string gets decoded using the character set provided to the constructor.
     * 
     * @param  eS the encoded string to decode.
     * @return The decoded string.
     * @throws IllegalArgumentException on any decoding error with a
     *         {@link URISyntaxException} as its
     *         {@link IllegalArgumentException#getCause() cause}.
     */
    public String decode(String eS) {
        try {
            StringBuilder dS = decode(eS, null);
            return null != dS ? dS.toString() : eS;
        } catch (URISyntaxException ex) {
            throw new IllegalArgumentException(ex);
        }
    }

    /**
     * Decodes all escape sequences in the string {@code eS}, that is,
     * each occurence of "%XX", where X is a hexadecimal digit,
     * gets substituted with the corresponding single byte and the resulting
     * string gets decoded to the string builder {@code dS} using the character
     * set provided to the constructor.
     * 
     * @param  eS the encoded string to decode.
     * @param  dS the string builder to which all decoded characters shall get
     *         appended.
     * @return If {@code eS} contains no escape sequences, then {@code null}
     *         gets returned.
     *         Otherwise, if {@code dS} is not {@code null}, then it gets
     *         returned with all decoded characters appended to it.
     *         Otherwise, a temporary string builder gets returned which solely
     *         contains all decoded characters.
     *         This temporary string builder may get cleared and reused upon
     *         the next call to any method of this object.
     * @throws URISyntaxException on any decoding error.
     *         This exception will leave {@code eS} in an undefined state.
     */
    public @CheckForNull StringBuilder decode(
            final String eS,
            @CheckForNull StringBuilder dS)
    throws URISyntaxException {
        final CharBuffer eC = CharBuffer.wrap(eS);  // encoded characters
        ByteBuffer eB = null;                       // encoded bytes
        CharBuffer dC = null;                       // decoded characters
        CharsetDecoder dec = null;
        while (true) {
            eC.mark();
            final int ec = eC.hasRemaining() ? eC.get() : -1; // char is unsigned!
            if ('%' == ec) {
                if (null == eB) {
                    if (null == dS) {
                        if (null == (dS = stringBuilder))
                            dS = stringBuilder = new StringBuilder();
                        else
                            dS.setLength(0);
                        dS.append(eS, 0, eC.position() - 1); // prefix until current character
                    }
                    int l = eC.remaining();
                    l = (l + 1) / 3;
                    eB = ByteBuffer.allocate(l);
                    dC = CharBuffer.allocate(l);
                    dec = decoder;
                }
                final int eb = dequote(eC);         // encoded byte
                if (eb < 0)
                    throw new URISyntaxException(
                            eS,
                            "illegal escape sequence",
                            eC.reset().position());
                eB.put((byte) eb);
            } else {
                if (null != eB && 0 < eB.position()) {
                    eB.flip();
                    { // Decode eB -> dC.
                        CoderResult cr;
                        if (UNDERFLOW != (cr = dec.reset().decode(eB, dC, true))
                                || UNDERFLOW != (cr = dec.flush(dC))) {
                            assert OVERFLOW != cr;
                            throw new QuotedUriSyntaxException(eS, cr.toString());
                        }
                    }
                    eB.clear();
                    dC.flip();
                    dS.append(dC);
                    dC.clear();
                }
                if (0 > ec)
                    break;
                if (null != dS)
                    dS.append((char) ec);
            }
        }
        return null == eB ? null : dS;
    }
}