org.apache.commons.codec.net.PercentCodec Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of commons-codec Show documentation
Show all versions of commons-codec Show documentation
The Apache Commons Codec package contains simple encoder and decoders for
various formats such as Base64 and Hexadecimal. In addition to these
widely used encoders and decoders, the codec package also maintains a
collection of phonetic encoding utilities.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.net;
import java.nio.ByteBuffer;
import java.util.BitSet;
import org.apache.commons.codec.BinaryDecoder;
import org.apache.commons.codec.BinaryEncoder;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.EncoderException;
/**
* Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification. For extensibility, an array of
* special US-ASCII characters can be specified in order to perform proper URI encoding for the different parts
* of the URI.
*
* This class is immutable. It is also thread-safe besides using BitSet which is not thread-safe, but its public
* interface only call the access
*
*
* @see Percent-Encoding
* @since 1.12
*/
public class PercentCodec implements BinaryEncoder, BinaryDecoder {
/**
* The escape character used by the Percent-Encoding in order to introduce an encoded character.
*/
private static final byte ESCAPE_CHAR = '%';
/**
* The bit set used to store the character that should be always encoded
*/
private final BitSet alwaysEncodeChars = new BitSet();
/**
* The flag defining if the space character should be encoded as '+'
*/
private final boolean plusForSpace;
/**
* The minimum and maximum code of the bytes that is inserted in the bit set, used to prevent look-ups
*/
private int alwaysEncodeCharsMin = Integer.MAX_VALUE, alwaysEncodeCharsMax = Integer.MIN_VALUE;
/**
* Constructs a Percent coded that will encode all the non US-ASCII characters using the Percent-Encoding
* while it will not encode all the US-ASCII characters, except for character '%' that is used as escape
* character for Percent-Encoding.
*/
public PercentCodec() {
this.plusForSpace = false;
insertAlwaysEncodeChar(ESCAPE_CHAR);
}
/**
* Constructs a Percent codec by specifying the characters that belong to US-ASCII that should
* always be encoded. The rest US-ASCII characters will not be encoded, except for character '%' that
* is used as escape character for Percent-Encoding.
*
* @param alwaysEncodeChars the unsafe characters that should always be encoded
* @param plusForSpace the flag defining if the space character should be encoded as '+'
*/
public PercentCodec(final byte[] alwaysEncodeChars, final boolean plusForSpace) {
this.plusForSpace = plusForSpace;
insertAlwaysEncodeChars(alwaysEncodeChars);
}
/**
* Adds the byte array into a BitSet for faster lookup
*
* @param alwaysEncodeCharsArray
*/
private void insertAlwaysEncodeChars(final byte[] alwaysEncodeCharsArray) {
if (alwaysEncodeCharsArray != null) {
for (final byte b : alwaysEncodeCharsArray) {
insertAlwaysEncodeChar(b);
}
}
insertAlwaysEncodeChar(ESCAPE_CHAR);
}
/**
* Inserts a single character into a BitSet and maintains the min and max of the characters of the
* {@code BitSet alwaysEncodeChars} in order to avoid look-ups when a byte is out of this range.
*
* @param b the byte that is candidate for min and max limit
*/
private void insertAlwaysEncodeChar(final byte b) {
this.alwaysEncodeChars.set(b);
if (b < alwaysEncodeCharsMin) {
alwaysEncodeCharsMin = b;
}
if (b > alwaysEncodeCharsMax) {
alwaysEncodeCharsMax = b;
}
}
/**
* Percent-Encoding based on RFC 3986. The non US-ASCII characters are encoded, as well as the
* US-ASCII characters that are configured to be always encoded.
*/
@Override
public byte[] encode(final byte[] bytes) throws EncoderException {
if (bytes == null) {
return null;
}
final int expectedEncodingBytes = expectedEncodingBytes(bytes);
final boolean willEncode = expectedEncodingBytes != bytes.length;
if (willEncode || (plusForSpace && containsSpace(bytes))) {
return doEncode(bytes, expectedEncodingBytes, willEncode);
}
return bytes;
}
private byte[] doEncode(final byte[] bytes, final int expectedLength, final boolean willEncode) {
final ByteBuffer buffer = ByteBuffer.allocate(expectedLength);
for (final byte b : bytes) {
if (willEncode && canEncode(b)) {
byte bb = b;
if (bb < 0) {
bb = (byte) (256 + bb);
}
final char hex1 = Utils.hexDigit(bb >> 4);
final char hex2 = Utils.hexDigit(bb);
buffer.put(ESCAPE_CHAR);
buffer.put((byte) hex1);
buffer.put((byte) hex2);
} else {
if (plusForSpace && b == ' ') {
buffer.put((byte) '+');
} else {
buffer.put(b);
}
}
}
return buffer.array();
}
private int expectedEncodingBytes(final byte[] bytes) {
int byteCount = 0;
for (final byte b : bytes) {
byteCount += canEncode(b) ? 3: 1;
}
return byteCount;
}
private boolean containsSpace(final byte[] bytes) {
for (final byte b : bytes) {
if (b == ' ') {
return true;
}
}
return false;
}
private boolean canEncode(final byte c) {
return !isAsciiChar(c) || (inAlwaysEncodeCharsRange(c) && alwaysEncodeChars.get(c));
}
private boolean inAlwaysEncodeCharsRange(final byte c) {
return c >= alwaysEncodeCharsMin && c <= alwaysEncodeCharsMax;
}
private boolean isAsciiChar(final byte c) {
return c >= 0;
}
/**
* Decode bytes encoded with Percent-Encoding based on RFC 3986. The reverse process is performed in order to
* decode the encoded characters to Unicode.
*/
@Override
public byte[] decode(final byte[] bytes) throws DecoderException {
if (bytes == null) {
return null;
}
final ByteBuffer buffer = ByteBuffer.allocate(expectedDecodingBytes(bytes));
for (int i = 0; i < bytes.length; i++) {
final byte b = bytes[i];
if (b == ESCAPE_CHAR) {
try {
final int u = Utils.digit16(bytes[++i]);
final int l = Utils.digit16(bytes[++i]);
buffer.put((byte) ((u << 4) + l));
} catch (final ArrayIndexOutOfBoundsException e) {
throw new DecoderException("Invalid percent decoding: ", e);
}
} else {
if (plusForSpace && b == '+') {
buffer.put((byte) ' ');
} else {
buffer.put(b);
}
}
}
return buffer.array();
}
private int expectedDecodingBytes(final byte[] bytes) {
int byteCount = 0;
for (int i = 0; i < bytes.length; ) {
final byte b = bytes[i];
i += b == ESCAPE_CHAR ? 3: 1;
byteCount++;
}
return byteCount;
}
/**
* Encodes an object into using the Percent-Encoding. Only byte[] objects are accepted.
*
* @param obj the object to encode
* @return the encoding result byte[] as Object
* @throws EncoderException if the object is not a byte array
*/
@Override
public Object encode(final Object obj) throws EncoderException {
if (obj == null) {
return null;
} else if (obj instanceof byte[]) {
return encode((byte[]) obj);
} else {
throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent encoded");
}
}
/**
* Decodes a byte[] Object, whose bytes are encoded with Percent-Encoding.
*
* @param obj the object to decode
* @return the decoding result byte[] as Object
* @throws DecoderException if the object is not a byte array
*/
@Override
public Object decode(final Object obj) throws DecoderException {
if (obj == null) {
return null;
} else if (obj instanceof byte[]) {
return decode((byte[]) obj);
} else {
throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent decoded");
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy