org.owasp.esapi.codecs.XMLEntityCodec Maven / Gradle / Ivy
/**
* OWASP Enterprise Security API (ESAPI)
*
* This file is part of the Open Web Application Security Project (OWASP)
* Enterprise Security API (ESAPI) project. For details, please see
* http://www.owasp.org/index.php/ESAPI.
*
* Copyright (c) 2009 - The OWASP Foundation
*
* The ESAPI is published by OWASP under the BSD license. You should read and accept the
* LICENSE before you use, modify, and/or redistribute this software.
*
*/
package org.owasp.esapi.codecs;
import java.util.Map;
import java.util.Set;
import org.owasp.esapi.util.CollectionsUtil;
/**
* Implementation of the Codec interface for XML entity encoding.
* This differes from HTML entity encoding in that only the following
* named entities are predefined:
*
* - lt
* - gt
* - amp
* - apos
* - quot
*
* However, the XML Specification 1.0 states in section 4.6 "Predefined
* Entities" that these should still be declared for interoperability
* purposes. As such, encoding in this class will not use them.
*
* It's also worth noting that unlike the HTMLEntityCodec, a trailing
* semicolon is required and all valid codepoints are accepted.
*
* Note that it is a REALLY bad idea to use this for decoding as an XML
* document can declare arbitrary entities that this Codec has no way
* of knowing about. Decoding is included for completeness but it's use
* is not recommended. Use a XML parser instead!
*/
public class XMLEntityCodec extends AbstractCharacterCodec
{
private static final String ALPHA_NUMERIC_STR = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
private static final String UNENCODED_STR = ALPHA_NUMERIC_STR + " \t";
private static final Set UNENCODED_SET = CollectionsUtil.strToUnmodifiableSet(UNENCODED_STR);
private static final HashTrie entityToCharacterMap;
static
{ // populate entitites
entityToCharacterMap = new HashTrie();
entityToCharacterMap.put("lt", '<');
entityToCharacterMap.put("gt", '>');
entityToCharacterMap.put("amp", '&');
entityToCharacterMap.put("apos", '\'');
entityToCharacterMap.put("quot", '"');
}
/**
* {@inheritDoc}
*
* Encodes a Character using XML entities as necessary.
*
* @param immune characters that should not be encoded as entities
*/
public String encodeCharacter(char[] immune, Character c)
{
// check for immune characters
if(containsCharacter(c, immune))
return c.toString();
// check for unencoded characters
if(UNENCODED_SET.contains(c))
return c.toString();
return "" + Integer.toHexString(c.charValue()) + ";";
}
/**
* {@inheritDoc}
*
* Returns the decoded version of the character starting at index, or
* null if no decoding is possible.
*
* Legal formats:
*
* - &#dddd;
* - &#xhhhh;
* - &name;
*
*/
public Character decodeCharacter(PushbackSequence input)
{
Character ret = null;
Character first;
Character second;
input.mark();
try
{
first = input.next();
if(first == null)
return null;
// if this is not an encoded character, return null
if(first != '&')
return null;
// test for numeric encodings
second = input.next();
if(second==null)
return null;
if(second=='#')
{ // handle numbers
ret = getNumericEntity(input);
}
else if(Character.isLetter(second.charValue()))
{ // handle entities
input.pushback(second);
ret = getNamedEntity(input);
}
}
finally
{
if(ret == null)
input.reset();
}
return ret;
}
/**
* Converts the rest of a numeric entity to a character.
* @param input The input to read from. It is assumed that input
* is positioned at the character after the &#
* @return The character decoded or null on failure.
*/
private static Character getNumericEntity(PushbackSequence input)
{
Character first = input.peek();
if(first == null)
return null;
if(first=='x'||first=='X')
{
input.next(); // nuke X
return parseHex(input);
}
return parseNumber(input);
}
/**
* Convert a integer code point to a Character.
* @param i the integer
* @return i as a Character or null if i is a invalid code point
* or outside of the Java char range.
*/
private static Character int2char(int i)
{
if(!Character.isValidCodePoint(i))
return null;
if(!(Character.MIN_VALUE <= i && i <= Character.MAX_VALUE))
return null; // we can't 0x010000-0x100000 currently
return (char)i;
}
/**
* Converts the rest of a decimal numeric entity to a character.
* @param input The input to read from. It is assumed that input
* is positioned at the character after the &# and that
* the next char is not a 'x' or 'X'.
* @return The character decoded or null on failutre.
*/
private static Character parseNumber(PushbackSequence input)
{
StringBuilder sb = new StringBuilder();
Character c;
while((c=input.next())!=null)
{
// end of entity?
if(c==';')
break;
// check for digit
if(!Character.isDigit(c.charValue()))
return null;
sb.append(c);
}
if(c==null)
return null; // not ';' termintated
if(sb.length()<=0) // no digits
return null;
try
{
return int2char(Integer.parseInt(sb.toString()));
}
catch(NumberFormatException e)
{
return null;
}
}
/**
* Converts the rest of a hexidecimal numeric entity to a character.
* @param input The input to read from. It is assumed that input
* is positioned at the character after the &#[xX]
* @return The character decoded or null on failutre.
*/
private static Character parseHex(PushbackSequence input)
{
Character c;
StringBuilder sb = new StringBuilder();
input_loop: while((c=input.next())!=null)
{
switch(c.charValue())
{
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
sb.append(c);
break;
case ';':
break input_loop;
default:
return null;
}
}
if(c==null)
return null; // not ';' termintated
if(sb.length()<=0) // no digits
return null;
try
{
return int2char(Integer.parseInt(sb.toString(),16));
}
catch(NumberFormatException e)
{
return null;
}
}
/**
*
* Converts the rest of a named entity to a character.
* null if no decoding is possible.
* @param input The input to read from. It is assumed that input
* is positioned at the character after the &.
* @return The character decoded or null on failutre.
*/
private Character getNamedEntity(PushbackSequence input)
{
StringBuilder possible = new StringBuilder();
Map.Entry entry;
int len;
// kludge around PushbackString....
len = Math.min(input.remainder().length(), entityToCharacterMap.getMaxKeyLength()+1);
for(int i=0;i