All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.owasp.esapi.codecs.PercentCodec Maven / Gradle / Ivy

Go to download

The Enterprise Security API (ESAPI) project is an OWASP project to create simple strong security controls for every web platform. Security controls are not simple to build. You can read about the hundreds of pitfalls for unwary developers on the OWASP website. By providing developers with a set of strong controls, we aim to eliminate some of the complexity of creating secure web applications. This can result in significant cost savings across the SDLC.

There is a newer version: 2.5.5.0
Show newest version
/**
 * OWASP Enterprise Security API (ESAPI)
 * 
 * This file is part of the Open Web Application Security Project (OWASP)
 * Enterprise Security API (ESAPI) project. For details, please see
 * http://www.owasp.org/index.php/ESAPI.
 *
 * Copyright (c) 2007 - The OWASP Foundation
 * 
 * The ESAPI is published by OWASP under the BSD license. You should read and accept the
 * LICENSE before you use, modify, and/or redistribute this software.
 * 
 * @author Jeff Williams Aspect Security
 * @created 2007
 */
package org.owasp.esapi.codecs;

import java.io.UnsupportedEncodingException;
import java.util.Set;

import org.owasp.esapi.util.CollectionsUtil;

/**
 * Implementation of the Codec interface for percent encoding (aka URL encoding).
 * 
 * @author Jeff Williams (jeff.williams .at. aspectsecurity.com) Aspect Security
 * @since June 1, 2007
 * @see org.owasp.esapi.Encoder
 */
public class PercentCodec extends AbstractCharacterCodec
{
	private static final String ALPHA_NUMERIC_STR = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
	@SuppressWarnings("unused")
	private static final String RFC3986_RESERVED_STR = ":/?#[]@!$&'()*+,;=";
	private static final String RFC3986_NON_ALPHANUMERIC_UNRESERVED_STR = "-._~";
		// rfc3986 2.3: For consistency, percent-encoded octets
		// in the ranges of ALPHA (%41-%5A and %61-%7A), DIGIT
		// (%30-%39), hyphen (%2D), period (%2E), underscore
		// (%5F), or tilde (%7E) should not be created by URI
		// producers
	private static final boolean ENCODED_NON_ALPHA_NUMERIC_UNRESERVED = true;
	private static final String UNENCODED_STR = ALPHA_NUMERIC_STR +
		(ENCODED_NON_ALPHA_NUMERIC_UNRESERVED ? "" : RFC3986_NON_ALPHANUMERIC_UNRESERVED_STR);
	private static final Set UNENCODED_SET = CollectionsUtil.strToUnmodifiableSet(UNENCODED_STR);

	/**
	 * Convinence method to encode a string into UTF-8. This
	 * wraps the {@link UnsupportedEncodingException} that
	 * {@link String#getBytes(String)} throws in a
	 * {@link IllegalStateException} as UTF-8 support is required
	 * by the Java spec and should never throw this exception.
	 * @param str the string to encode
	 * @return str encoded in UTF-8 as bytes.
	 * @throws IllegalStateException wrapped {@link
	 *	UnsupportedEncodingException} if
	 *	{@link String.getBytes(String)} throws it.
	 */
	private static byte[] toUtf8Bytes(String str)
	{
		try
		{
			return str.getBytes("UTF-8");
		}
		catch(UnsupportedEncodingException e)
		{
			throw new IllegalStateException("The Java spec requires UTF-8 support.", e);
		}
	}

	/**
	 * Append the two upper case hex characters for a byte.
	 * @param sb The string buffer to append to.
	 * @param b The byte to hexify
	 * @return sb with the hex characters appended.
	 */
	// rfc3986 2.1: For consistency, URI producers 
	// should use uppercase hexadecimal digits for all percent-
	// encodings.
	private static StringBuilder appendTwoUpperHex(StringBuilder sb, int b)
	{
		if(b < Byte.MIN_VALUE || b > Byte.MAX_VALUE)
			throw new IllegalArgumentException("b is not a byte (was " + b + ')');
		b &= 0xFF;
		if(b<0x10)
			sb.append('0');
		return sb.append(Integer.toHexString(b).toUpperCase());
	}

	/**
	 * Encode a character for URLs
	 * @param immune Additional characters not to encode. Note this could
     *               break URL encoding as referenced in RFC 3986. You should
     *               especially be wary of including '%' in this list of immune
     *               characters since it is used as the "escape" character for
     *               the hex encoding and including it may result in subsequent
     *               and/or dangerous results when decoding.
	 * @param c character to encode
	 * @return the encoded string representing c
	 */
	public String encodeCharacter( char[] immune, Character c )
	{
		String cStr = String.valueOf(c.charValue());
		byte[] bytes;
		StringBuilder sb;

        // check for user specified immune characters
        if ( immune != null && containsCharacter( c.charValue(), immune ) )
            return cStr;

        // check for standard characters (e.g., alphanumeric, etc.)
		if(UNENCODED_SET.contains(c))
			return cStr;

		bytes = toUtf8Bytes(cStr);
		sb = new StringBuilder(bytes.length * 3);
		for(byte b : bytes)
			appendTwoUpperHex(sb.append('%'), b);
		return sb.toString();
	}

	/**
	 * {@inheritDoc}
	 * 
	 * Formats all are legal both upper/lower case:
	 *   %hh;
	 *   
	 * @param input
	 * 			encoded character using percent characters (such as URL encoding)
	 */
	public Character decodeCharacter( PushbackSequence input ) {
		input.mark();
		Character first = input.next();
		if ( first == null ) {
			input.reset();
			return null;
		}

		// if this is not an encoded character, return null
		if (first != '%' ) {
			input.reset();
			return null;
		}

		// Search for exactly 2 hex digits following
		StringBuilder sb = new StringBuilder();
		for ( int i=0; i<2; i++ ) {
			Character c = input.nextHex();
			if ( c != null ) sb.append( c );
		}
		if ( sb.length() == 2 ) {
			try {
				// parse the hex digit and create a character
				int i = Integer.parseInt(sb.toString(), 16);
				if (Character.isValidCodePoint(i)) {
					return (char) i;
				}
			} catch( NumberFormatException ignored ) { }
		}
		input.reset();
		return null;
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy