org.owasp.esapi.codecs.XMLEntityCodec Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of esapi Show documentation
The Enterprise Security API (ESAPI) project is an OWASP project to create simple strong security controls for every web platform. Security controls are not simple to build. You can read about the hundreds of pitfalls for unwary developers on the OWASP website. By providing developers with a set of strong controls, we aim to eliminate some of the complexity of creating secure web applications. This can result in significant cost savings across the SDLC.
There is a newer version: 2.5.5.0
Show newest version
/**
 * OWASP Enterprise Security API (ESAPI)
 * 
 * This file is part of the Open Web Application Security Project (OWASP)
 * Enterprise Security API (ESAPI) project. For details, please see
 * http://www.owasp.org/index.php/ESAPI.
 *
 * Copyright (c) 2009 - The OWASP Foundation
 * 
 * The ESAPI is published by OWASP under the BSD license. You should read and accept the
 * LICENSE before you use, modify, and/or redistribute this software.
 * 
 */
package org.owasp.esapi.codecs;

import java.util.Map;
import java.util.Set;

import org.owasp.esapi.util.CollectionsUtil;

/**
 * Implementation of the Codec interface for XML entity encoding.
 * This differes from HTML entity encoding in that only the following
 * named entities are predefined:
 * 
 * 	lt
 * 	gt
 * 	amp
 * 	apos
 * 	quot
 * 
 * However, the XML Specification 1.0 states in section 4.6 "Predefined
 * Entities" that these should still be declared for interoperability
 * purposes. As such, encoding in this class will not use them.
 *
 * It's also worth noting that unlike the HTMLEntityCodec, a trailing
 * semicolon is required and all valid codepoints are accepted.
 *
 * Note that it is a REALLY bad idea to use this for decoding as an XML
 * document can declare arbitrary entities that this Codec has no way
 * of knowing about. Decoding is included for completeness but it's use
 * is not recommended. Use a XML parser instead!
 */
public class XMLEntityCodec extends AbstractCharacterCodec
{
	private static final String ALPHA_NUMERIC_STR = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
	private static final String UNENCODED_STR = ALPHA_NUMERIC_STR + " \t";
	private static final Set UNENCODED_SET = CollectionsUtil.strToUnmodifiableSet(UNENCODED_STR);
	private static final HashTrie entityToCharacterMap;

	static
	{	// populate entitites
		entityToCharacterMap = new HashTrie();
		entityToCharacterMap.put("lt", '<');
		entityToCharacterMap.put("gt", '>');
		entityToCharacterMap.put("amp", '&');
		entityToCharacterMap.put("apos", '\'');
		entityToCharacterMap.put("quot", '"');
	}

	/**
	 * {@inheritDoc}
	 * 
	 * Encodes a Character using XML entities as necessary.
	 *
	 * @param immune characters that should not be encoded as entities
	 */
	public String encodeCharacter(char[] immune, Character c)
	{
		// check for immune characters
		if(containsCharacter(c, immune))
			return c.toString();

		// check for unencoded characters
		if(UNENCODED_SET.contains(c))
			return c.toString();

		return "&#x" + Integer.toHexString(c.charValue()) + ";";
	}

	/**
	 * {@inheritDoc}
	 * 
	 * Returns the decoded version of the character starting at index, or
	 * null if no decoding is possible.
	 * 
	 * Legal formats:
	 * 
	 * 	&#dddd;
	 * 	&#xhhhh;
	 * 	&name;
	 * 
	 */
	public Character decodeCharacter(PushbackSequence input)
	{
		Character ret = null;
		Character first;
		Character second;

		input.mark();
		try
		{
			first = input.next();
			if(first == null)
				return null;

			// if this is not an encoded character, return null
			if(first != '&')
				return null;

			// test for numeric encodings
			second = input.next();
			if(second==null)
				return null;

			if(second=='#')
			{	// handle numbers
				ret = getNumericEntity(input);
			}
			else if(Character.isLetter(second.charValue()))
			{	// handle entities
				input.pushback(second);
				ret = getNamedEntity(input);
			}
		}
		finally
		{
			if(ret == null)
				input.reset();
		}
		return ret;
	}

	/**
	 * Converts the rest of a numeric entity to a character.
	 * @param input The input to read from. It is assumed that input
	 * 	is positioned at the character after the &#
	 * @return The character decoded or null on failure.
	 */
	private static Character getNumericEntity(PushbackSequence input)
	{
		Character first = input.peek();

		if(first == null)
			return null;

		if(first=='x'||first=='X')
		{
			input.next();	// nuke X
			return parseHex(input);
		}
		return parseNumber(input);
	}

	/**
	 * Convert a integer code point to a Character.
	 * @param i the integer
	 * @return i as a Character or null if i is a invalid code point
	 * 	or outside of the Java char range.
	 */
	private static Character int2char(int i)
	{
		if(!Character.isValidCodePoint(i))
			return null;
		if(!(Character.MIN_VALUE <= i && i <= Character.MAX_VALUE))
			return null;	// we can't 0x010000-0x100000 currently
		return (char)i;
	}

	/**
	 * Converts the rest of a decimal numeric entity to a character.
	 * @param input The input to read from. It is assumed that input
	 * 	is positioned at the character after the &# and that
	 *	the next char is not a 'x' or 'X'.
	 * @return The character decoded or null on failutre.
	 */
	private static Character parseNumber(PushbackSequence input)
	{
		StringBuilder sb = new StringBuilder();
		Character c;
		while((c=input.next())!=null)
		{
			// end of entity?
			if(c==';')
				break;

			// check for digit
			if(!Character.isDigit(c.charValue()))
				return null;
			sb.append(c);
		}
		if(c==null)
			return null;	// not ';' termintated
		if(sb.length()<=0)	// no digits
			return null;
		try
		{
			return int2char(Integer.parseInt(sb.toString()));
		}
		catch(NumberFormatException e)
		{
			return null;
		}
	}

	/**
	 * Converts the rest of a hexidecimal numeric entity to a character.
	 * @param input The input to read from. It is assumed that input
	 * 	is positioned at the character after the &#[xX]
	 * @return The character decoded or null on failutre.
	 */
	private static Character parseHex(PushbackSequence input)
	{
		Character c;
		StringBuilder sb = new StringBuilder();
		input_loop: while((c=input.next())!=null)
		{
			switch(c.charValue())
			{
				case 'a':
				case 'b':
				case 'c':
				case 'd':
				case 'e':
				case 'f':
				case 'A':
				case 'B':
				case 'C':
				case 'D':
				case 'E':
				case '0':
				case '1':
				case '2':
				case '3':
				case '4':
				case '5':
				case '6':
				case '7':
				case '8':
				case '9':
					sb.append(c);
					break;
				case ';':
					break input_loop;
				default:
					return null;
			}
		}
		if(c==null)
			return null;	// not ';' termintated
		if(sb.length()<=0)	// no digits
			return null;
		try
		{
			return int2char(Integer.parseInt(sb.toString(),16));
		}
		catch(NumberFormatException e)
		{
			return null;
		}
	}

	/**
	 * 
	 * Converts the rest of a named entity to a character.
	 * null if no decoding is possible.
	 * @param input The input to read from. It is assumed that input
	 * 	is positioned at the character after the &.
	 * @return The character decoded or null on failutre.
	 */
	private Character getNamedEntity(PushbackSequence input)
	{
		StringBuilder possible = new StringBuilder();
		Map.Entry entry;
		int len;

		// kludge around PushbackString....
		len = Math.min(input.remainder().length(), entityToCharacterMap.getMaxKeyLength()+1);
		for(int i=0;i