org.owasp.esapi.codecs.AbstractCodec Maven / Gradle / Ivy

/**
 * OWASP Enterprise Security API (ESAPI)
 * 
 * This file is part of the Open Web Application Security Project (OWASP)
 * Enterprise Security API (ESAPI) project. For details, please see
 * http://www.owasp.org/index.php/ESAPI.
 *
 * Copyright (c) 2017 - The OWASP Foundation
 * 
 * The ESAPI is published by OWASP under the BSD license. You should read and accept the
 * LICENSE before you use, modify, and/or redistribute this software.
 * 
 * @author Matt Seil (mseil .at. owasp.org)
 * @created 2017
 */
package org.owasp.esapi.codecs;


/**
 * The Codec interface defines a set of methods for encoding and decoding application level encoding schemes,
 * such as HTML entity encoding and percent encoding (aka URL encoding). Codecs are used in output encoding
 * and canonicalization.  The design of these codecs allows for character-by-character decoding, which is
 * necessary to detect double-encoding and the use of multiple encoding schemes, both of which are techniques
 * used by attackers to bypass validation and bury encoded attacks in data.
 * 
 * @author Jeff Williams (jeff.williams .at. aspectsecurity.com) Aspect Security
 * @param 
 * @since June 1, 2007
 * @see org.owasp.esapi.Encoder
 */
public abstract class AbstractCodec implements Codec {

	/**
	 * Initialize an array to mark which characters are to be encoded. Store the hex
	 * string for that character to save time later. If the character shouldn't be
	 * encoded, then store null.
	 */
	private final String[] hex = new String[256];

	/**
	 * Default constructor
	 */
	public AbstractCodec() {
		for ( char c = 0; c < 0xFF; c++ ) {
			if ( c >= 0x30 && c <= 0x39 || c >= 0x41 && c <= 0x5A || c >= 0x61 && c <= 0x7A ) {
				hex[c] = null;
			} else {
				hex[c] = toHex(c).intern();
			}
		}
	}

	/**
	 * WARNING!!  {@code Character} based Codecs will silently transform code points that are not 
	 * legal UTF code points into garbage data as they will cast them to {@code char}s.  
	 * 


	 * If you are implementing an {@code Integer} based codec, these will be silently discarded
	 * based on the return from {@code Character.isValidCodePoint( int )}.  This is the preferred
	 * behavior moving forward.  
	 * 
	 * 
	 * {@inheritDoc}
	 */
	@Override
	public String encode(char[] immune, String input) {
		StringBuilder sb = new StringBuilder();
		for(int offset  = 0; offset < input.length(); ) {
			final int point = input.codePointAt(offset);
			if (Character.isBmpCodePoint(point)) {
				//We can then safely cast this to char and maintain legacy behavior.
				sb.append(encodeCharacter(immune, new Character((char) point)));
			} else {
				sb.append(encodeCharacter(immune, point));	
			}
			offset += Character.charCount(point);
		}
		return sb.toString();
	}

	/**
	 * WARNING!!!!  Passing a standard char to this method will resolve to the 
	 * @see #encodeCharacter( char[], int )
	 * method instead of this one!!!  YOU HAVE BEEN WARNED!!!!
	 * 
	 * {@inheritDoc}
	 */
	@Override
	public String encodeCharacter( char[] immune, Character c ) {
		return ""+c;
	}
	
	public String encodeCharacter(char[] immune, char c) {
		throw new IllegalArgumentException("You tried to call encodeCharacter with a char.  Nope.  Use Character instead!");
	}
	
	/* (non-Javadoc)
	 * @see org.owasp.esapi.codecs.Codec#encodeCharacter(char[], int)
	 */
	@Override
	public String encodeCharacter( char[] immune, int codePoint ) {
		String rval = "";
		if(Character.isValidCodePoint(codePoint)){
			rval = new StringBuilder().appendCodePoint(codePoint).toString();
		}
		return rval;
	}



	/* (non-Javadoc)
	 * @see org.owasp.esapi.codecs.Codec#decodeCharacter(org.owasp.esapi.codecs.PushbackString)
	 */
	@Override
	public T decodeCharacter( PushbackSequence input ) {
		return input.next();
	}

	/**
	 * {@inheritDoc}
	 */
	public String getHexForNonAlphanumeric(char c) {
		if(c<0xFF)
			return hex[c];
		return toHex(c);
	}
	
	/**
	 * {@inheritDoc}
	 */
	public String getHexForNonAlphanumeric(int c) {
		if (c<0xFF) {
			return hex[c];
		} else {
			return toHex(c);
		}
	}

	public String toOctal(char c) {
		return Integer.toOctalString(c);
	}

	public String toHex(char c) {
		return Integer.toHexString(c);
	}
	
	public String toHex(int c) {
		return Integer.toHexString(c);
	}

	/**
	 * {@inheritDoc}
	 */
	public boolean containsCharacter( char c, char[] array ) {
		for (char ch : array) {
			if (c == ch) return true;
		}
		return false;
	}

}