All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.codec.net.PercentCodec Maven / Gradle / Ivy

There is a newer version: 3.0.0-alpha-3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.codec.net;

import java.nio.ByteBuffer;
import java.util.BitSet;

import org.apache.commons.codec.BinaryDecoder;
import org.apache.commons.codec.BinaryEncoder;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.EncoderException;

/**
 * Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification. For extensibility, an array of
 * special US-ASCII characters can be specified in order to perform proper URI encoding for the different parts
 * of the URI.
 * 

* This class is immutable. It is also thread-safe besides using BitSet which is not thread-safe, but its public * interface only call the access *

* * @see Percent-Encoding * @since 1.12 */ public class PercentCodec implements BinaryEncoder, BinaryDecoder { /** * The escape character used by the Percent-Encoding in order to introduce an encoded character. */ private static final byte ESCAPE_CHAR = '%'; /** * The bit set used to store the character that should be always encoded */ private final BitSet alwaysEncodeChars = new BitSet(); /** * The flag defining if the space character should be encoded as '+' */ private final boolean plusForSpace; /** * The minimum and maximum code of the bytes that is inserted in the bit set, used to prevent look-ups */ private int alwaysEncodeCharsMin = Integer.MAX_VALUE, alwaysEncodeCharsMax = Integer.MIN_VALUE; /** * Constructs a Percent coded that will encode all the non US-ASCII characters using the Percent-Encoding * while it will not encode all the US-ASCII characters, except for character '%' that is used as escape * character for Percent-Encoding. */ public PercentCodec() { this.plusForSpace = false; insertAlwaysEncodeChar(ESCAPE_CHAR); } /** * Constructs a Percent codec by specifying the characters that belong to US-ASCII that should * always be encoded. The rest US-ASCII characters will not be encoded, except for character '%' that * is used as escape character for Percent-Encoding. * * @param alwaysEncodeChars the unsafe characters that should always be encoded * @param plusForSpace the flag defining if the space character should be encoded as '+' */ public PercentCodec(final byte[] alwaysEncodeChars, final boolean plusForSpace) { this.plusForSpace = plusForSpace; insertAlwaysEncodeChars(alwaysEncodeChars); } private boolean canEncode(final byte c) { return !isAsciiChar(c) || inAlwaysEncodeCharsRange(c) && alwaysEncodeChars.get(c); } private boolean containsSpace(final byte[] bytes) { for (final byte b : bytes) { if (b == ' ') { return true; } } return false; } /** * Decodes bytes encoded with Percent-Encoding based on RFC 3986. The reverse process is performed in order to * decode the encoded characters to Unicode. */ @Override public byte[] decode(final byte[] bytes) throws DecoderException { if (bytes == null) { return null; } final ByteBuffer buffer = ByteBuffer.allocate(expectedDecodingBytes(bytes)); for (int i = 0; i < bytes.length; i++) { final byte b = bytes[i]; if (b == ESCAPE_CHAR) { try { final int u = Utils.digit16(bytes[++i]); final int l = Utils.digit16(bytes[++i]); buffer.put((byte) ((u << 4) + l)); } catch (final ArrayIndexOutOfBoundsException e) { throw new DecoderException("Invalid percent decoding: ", e); } } else if (plusForSpace && b == '+') { buffer.put((byte) ' '); } else { buffer.put(b); } } return buffer.array(); } /** * Decodes a byte[] Object, whose bytes are encoded with Percent-Encoding. * * @param obj the object to decode * @return the decoding result byte[] as Object * @throws DecoderException if the object is not a byte array */ @Override public Object decode(final Object obj) throws DecoderException { if (obj == null) { return null; } if (obj instanceof byte[]) { return decode((byte[]) obj); } throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent decoded"); } private byte[] doEncode(final byte[] bytes, final int expectedLength, final boolean willEncode) { final ByteBuffer buffer = ByteBuffer.allocate(expectedLength); for (final byte b : bytes) { if (willEncode && canEncode(b)) { byte bb = b; if (bb < 0) { bb = (byte) (256 + bb); } final char hex1 = Utils.hexDigit(bb >> 4); final char hex2 = Utils.hexDigit(bb); buffer.put(ESCAPE_CHAR); buffer.put((byte) hex1); buffer.put((byte) hex2); } else if (plusForSpace && b == ' ') { buffer.put((byte) '+'); } else { buffer.put(b); } } return buffer.array(); } /** * Percent-Encoding based on RFC 3986. The non US-ASCII characters are encoded, as well as the * US-ASCII characters that are configured to be always encoded. */ @Override public byte[] encode(final byte[] bytes) throws EncoderException { if (bytes == null) { return null; } final int expectedEncodingBytes = expectedEncodingBytes(bytes); final boolean willEncode = expectedEncodingBytes != bytes.length; if (willEncode || plusForSpace && containsSpace(bytes)) { return doEncode(bytes, expectedEncodingBytes, willEncode); } return bytes; } /** * Encodes an object into using the Percent-Encoding. Only byte[] objects are accepted. * * @param obj the object to encode * @return the encoding result byte[] as Object * @throws EncoderException if the object is not a byte array */ @Override public Object encode(final Object obj) throws EncoderException { if (obj == null) { return null; } if (obj instanceof byte[]) { return encode((byte[]) obj); } throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent encoded"); } private int expectedDecodingBytes(final byte[] bytes) { int byteCount = 0; for (int i = 0; i < bytes.length;) { final byte b = bytes[i]; i += b == ESCAPE_CHAR ? 3 : 1; byteCount++; } return byteCount; } private int expectedEncodingBytes(final byte[] bytes) { int byteCount = 0; for (final byte b : bytes) { byteCount += canEncode(b) ? 3 : 1; } return byteCount; } private boolean inAlwaysEncodeCharsRange(final byte c) { return c >= alwaysEncodeCharsMin && c <= alwaysEncodeCharsMax; } /** * Inserts a single character into a BitSet and maintains the min and max of the characters of the * {@code BitSet alwaysEncodeChars} in order to avoid look-ups when a byte is out of this range. * * @param b the byte that is candidate for min and max limit */ private void insertAlwaysEncodeChar(final byte b) { if (b < 0) { throw new IllegalArgumentException("byte must be >= 0"); } this.alwaysEncodeChars.set(b); if (b < alwaysEncodeCharsMin) { alwaysEncodeCharsMin = b; } if (b > alwaysEncodeCharsMax) { alwaysEncodeCharsMax = b; } } /** * Inserts the byte array into a BitSet for faster lookup. * * @param alwaysEncodeCharsArray */ private void insertAlwaysEncodeChars(final byte[] alwaysEncodeCharsArray) { if (alwaysEncodeCharsArray != null) { for (final byte b : alwaysEncodeCharsArray) { insertAlwaysEncodeChar(b); } } insertAlwaysEncodeChar(ESCAPE_CHAR); } private boolean isAsciiChar(final byte c) { return c >= 0; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy