com.helger.commons.codec.URLCodec Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of ph-commons Show documentation
Show all versions of ph-commons Show documentation
Java 1.8+ Library with tons of utility classes required in all projects
/*
* Copyright (C) 2014-2024 Philip Helger (www.helger.com)
* philip[at]helger[dot]com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.helger.commons.codec;
import java.io.IOException;
import java.io.OutputStream;
import java.util.BitSet;
import javax.annotation.Nonnegative;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.annotation.WillNotClose;
import javax.annotation.concurrent.ThreadSafe;
import com.helger.commons.annotation.ReturnsMutableCopy;
import com.helger.commons.string.StringHelper;
/**
* Encoder and decoder for URL stuff based on RFC 3986.
*
* @author Philip Helger
*/
@ThreadSafe
public class URLCodec implements IByteArrayCodec
{
private static final byte ESCAPE_CHAR = '%';
private static final byte SPACE = ' ';
private static final byte PLUS = '+';
/**
* BitSet of RFC 3986 unreserved characters
*/
private static final BitSet PRINTABLE_CHARS_RFC3986 = new BitSet (256);
static
{
// alpha characters
for (int i = 'a'; i <= 'z'; i++)
PRINTABLE_CHARS_RFC3986.set (i);
for (int i = 'A'; i <= 'Z'; i++)
PRINTABLE_CHARS_RFC3986.set (i);
// numeric characters
for (int i = '0'; i <= '9'; i++)
PRINTABLE_CHARS_RFC3986.set (i);
// special chars
PRINTABLE_CHARS_RFC3986.set ('-');
PRINTABLE_CHARS_RFC3986.set ('_');
PRINTABLE_CHARS_RFC3986.set ('.');
PRINTABLE_CHARS_RFC3986.set ('~');
// blank to be replaced with +
PRINTABLE_CHARS_RFC3986.set (SPACE);
// Apache Http-client also adds "*" to printable chars
}
/**
* @return A copy of the default bit set to be used. Never null
.
*/
@Nonnull
@ReturnsMutableCopy
public static BitSet getDefaultPrintableChars ()
{
return (BitSet) PRINTABLE_CHARS_RFC3986.clone ();
}
private final BitSet m_aPrintableChars;
/**
* Default constructor with the RFC 3986 printable characters.
*/
public URLCodec ()
{
this (PRINTABLE_CHARS_RFC3986);
}
/**
* Constructor with an arbitrary set of printable characters.
*
* @param aPrintableChars
* The printable character BitSet to use. May not be null
.
*/
public URLCodec (@Nonnull final BitSet aPrintableChars)
{
m_aPrintableChars = (BitSet) aPrintableChars.clone ();
}
/**
* @return A copy of the default bit set to be used. Never null
.
*/
@Nonnull
@ReturnsMutableCopy
public BitSet getPrintableChars ()
{
return (BitSet) m_aPrintableChars.clone ();
}
/**
* Encodes byte into its URL representation.
*
* @param b
* byte to encode
* @param aOS
* the output stream to write to. May not be null
.
* @throws IOException
* In case writing to the OutputStream failed
*/
public static final void writeEncodedURLByte (final int b, @Nonnull final OutputStream aOS) throws IOException
{
// Hex chars should be upper case as defined in RFC 3986 section 2.1
final char cHigh = StringHelper.getHexCharUpperCase ((b >> 4) & 0xF);
final char cLow = StringHelper.getHexCharUpperCase (b & 0xF);
aOS.write (ESCAPE_CHAR);
aOS.write (cHigh);
aOS.write (cLow);
}
public void encode (@Nullable final byte [] aDecodedBuffer,
@Nonnegative final int nOfs,
@Nonnegative final int nLen,
@Nonnull @WillNotClose final OutputStream aOS)
{
if (aDecodedBuffer == null || nLen == 0)
return;
try
{
for (int i = 0; i < nLen; ++i)
{
final int b = aDecodedBuffer[nOfs + i] & 0xff;
if (m_aPrintableChars.get (b))
{
if (b == SPACE)
aOS.write (PLUS);
else
aOS.write (b);
}
else
{
writeEncodedURLByte (b, aOS);
}
}
}
catch (final IOException ex)
{
throw new EncodeException ("Failed to encode URL", ex);
}
}
public void decode (@Nullable final byte [] aEncodedBuffer,
@Nonnegative final int nOfs,
@Nonnegative final int nLen,
@Nonnull @WillNotClose final OutputStream aOS)
{
if (aEncodedBuffer == null || nLen == 0)
return;
try
{
for (int i = 0; i < nLen; i++)
{
final int b = aEncodedBuffer[nOfs + i];
if (b == PLUS)
aOS.write (SPACE);
else
if (b == ESCAPE_CHAR)
{
if (i >= nLen - 2)
throw new DecodeException ("Invalid URL encoding. Premature end of input after escape char");
final char cHigh = (char) aEncodedBuffer[nOfs + i + 1];
final char cLow = (char) aEncodedBuffer[nOfs + i + 2];
i += 2;
final int nDecodedValue = StringHelper.getHexByte (cHigh, cLow);
if (nDecodedValue < 0)
throw new DecodeException ("Invalid URL encoding for " + (int) cHigh + " and " + (int) cLow);
aOS.write (nDecodedValue);
}
else
{
aOS.write (b);
}
}
}
catch (final IOException ex)
{
throw new DecodeException ("Failed to decode URL", ex);
}
}
}