All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.owasp.esapi.codecs.XMLEntityCodec Maven / Gradle / Ivy

Go to download

The Enterprise Security API (ESAPI) project is an OWASP project to create simple strong security controls for every web platform. Security controls are not simple to build. You can read about the hundreds of pitfalls for unwary developers on the OWASP website. By providing developers with a set of strong controls, we aim to eliminate some of the complexity of creating secure web applications. This can result in significant cost savings across the SDLC.

There is a newer version: 2.5.5.0
Show newest version
/**
 * OWASP Enterprise Security API (ESAPI)
 * 
 * This file is part of the Open Web Application Security Project (OWASP)
 * Enterprise Security API (ESAPI) project. For details, please see
 * http://www.owasp.org/index.php/ESAPI.
 *
 * Copyright (c) 2009 - The OWASP Foundation
 * 
 * The ESAPI is published by OWASP under the BSD license. You should read and accept the
 * LICENSE before you use, modify, and/or redistribute this software.
 * 
 */
package org.owasp.esapi.codecs;

import java.util.Map;
import java.util.Set;

import org.owasp.esapi.util.CollectionsUtil;

/**
 * Implementation of the Codec interface for XML entity encoding.
 * This differes from HTML entity encoding in that only the following
 * named entities are predefined:
 * 
    *
  • lt
  • *
  • gt
  • *
  • amp
  • *
  • apos
  • *
  • quot
  • *
* However, the XML Specification 1.0 states in section 4.6 "Predefined * Entities" that these should still be declared for interoperability * purposes. As such, encoding in this class will not use them. * * It's also worth noting that unlike the HTMLEntityCodec, a trailing * semicolon is required and all valid codepoints are accepted. * * Note that it is a REALLY bad idea to use this for decoding as an XML * document can declare arbitrary entities that this Codec has no way * of knowing about. Decoding is included for completeness but it's use * is not recommended. Use a XML parser instead! */ public class XMLEntityCodec extends AbstractCharacterCodec { private static final String ALPHA_NUMERIC_STR = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; private static final String UNENCODED_STR = ALPHA_NUMERIC_STR + " \t"; private static final Set UNENCODED_SET = CollectionsUtil.strToUnmodifiableSet(UNENCODED_STR); private static final HashTrie entityToCharacterMap; static { // populate entitites entityToCharacterMap = new HashTrie(); entityToCharacterMap.put("lt", '<'); entityToCharacterMap.put("gt", '>'); entityToCharacterMap.put("amp", '&'); entityToCharacterMap.put("apos", '\''); entityToCharacterMap.put("quot", '"'); } /** * {@inheritDoc} * * Encodes a Character using XML entities as necessary. * * @param immune characters that should not be encoded as entities */ public String encodeCharacter(char[] immune, Character c) { // check for immune characters if(containsCharacter(c, immune)) return c.toString(); // check for unencoded characters if(UNENCODED_SET.contains(c)) return c.toString(); return "&#x" + Integer.toHexString(c.charValue()) + ";"; } /** * {@inheritDoc} * * Returns the decoded version of the character starting at index, or * null if no decoding is possible. * * Legal formats: *
    *
  • &#dddd;
  • *
  • &#xhhhh;
  • *
  • &name;
  • *
*/ public Character decodeCharacter(PushbackSequence input) { Character ret = null; Character first; Character second; input.mark(); try { first = input.next(); if(first == null) return null; // if this is not an encoded character, return null if(first != '&') return null; // test for numeric encodings second = input.next(); if(second==null) return null; if(second=='#') { // handle numbers ret = getNumericEntity(input); } else if(Character.isLetter(second.charValue())) { // handle entities input.pushback(second); ret = getNamedEntity(input); } } finally { if(ret == null) input.reset(); } return ret; } /** * Converts the rest of a numeric entity to a character. * @param input The input to read from. It is assumed that input * is positioned at the character after the &# * @return The character decoded or null on failure. */ private static Character getNumericEntity(PushbackSequence input) { Character first = input.peek(); if(first == null) return null; if(first=='x'||first=='X') { input.next(); // nuke X return parseHex(input); } return parseNumber(input); } /** * Convert a integer code point to a Character. * @param i the integer * @return i as a Character or null if i is a invalid code point * or outside of the Java char range. */ private static Character int2char(int i) { if(!Character.isValidCodePoint(i)) return null; if(!(Character.MIN_VALUE <= i && i <= Character.MAX_VALUE)) return null; // we can't 0x010000-0x100000 currently return (char)i; } /** * Converts the rest of a decimal numeric entity to a character. * @param input The input to read from. It is assumed that input * is positioned at the character after the &# and that * the next char is not a 'x' or 'X'. * @return The character decoded or null on failutre. */ private static Character parseNumber(PushbackSequence input) { StringBuilder sb = new StringBuilder(); Character c; while((c=input.next())!=null) { // end of entity? if(c==';') break; // check for digit if(!Character.isDigit(c.charValue())) return null; sb.append(c); } if(c==null) return null; // not ';' termintated if(sb.length()<=0) // no digits return null; try { return int2char(Integer.parseInt(sb.toString())); } catch(NumberFormatException e) { return null; } } /** * Converts the rest of a hexidecimal numeric entity to a character. * @param input The input to read from. It is assumed that input * is positioned at the character after the &#[xX] * @return The character decoded or null on failutre. */ private static Character parseHex(PushbackSequence input) { Character c; StringBuilder sb = new StringBuilder(); input_loop: while((c=input.next())!=null) { switch(c.charValue()) { case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'A': case 'B': case 'C': case 'D': case 'E': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': sb.append(c); break; case ';': break input_loop; default: return null; } } if(c==null) return null; // not ';' termintated if(sb.length()<=0) // no digits return null; try { return int2char(Integer.parseInt(sb.toString(),16)); } catch(NumberFormatException e) { return null; } } /** * * Converts the rest of a named entity to a character. * null if no decoding is possible. * @param input The input to read from. It is assumed that input * is positioned at the character after the &. * @return The character decoded or null on failutre. */ private Character getNamedEntity(PushbackSequence input) { StringBuilder possible = new StringBuilder(); Map.Entry entry; int len; // kludge around PushbackString.... len = Math.min(input.remainder().length(), entityToCharacterMap.getMaxKeyLength()+1); for(int i=0;i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy