Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* OWASP Enterprise Security API (ESAPI)
*
* This file is part of the Open Web Application Security Project (OWASP)
* Enterprise Security API (ESAPI) project. For details, please see
* http://www.owasp.org/index.php/ESAPI.
*
* Copyright (c) 2017 - The OWASP Foundation
*
* The ESAPI is published by OWASP under the BSD license. You should read and accept the
* LICENSE before you use, modify, and/or redistribute this software.
*
* @author Matt Seil (mseil .at. owasp.org)
* @created 2017
*
* @author Jeff Williams (jeff.williams .at. aspectsecurity.com) Aspect Security
* @created 2007
*/
package org.owasp.esapi.codecs;
import java.util.HashMap;
import java.util.Collections;
import java.util.Map;
import java.util.Map.Entry;
/**
* Implementation of the Codec interface for HTML entity encoding.
*
* @author Jeff Williams (jeff.williams .at. aspectsecurity.com) Aspect Security
* @since June 1, 2007
*
* @author Matt Seil (mseil .at. owasp.org) (mseil .at. owasp.org)
*
* @see org.owasp.esapi.Encoder
*/
public class HTMLEntityCodec extends AbstractIntegerCodec
{
private static final char REPLACEMENT_CHAR = '\ufffd';
private static final String REPLACEMENT_HEX = "fffd";
private static final String REPLACEMENT_STR = "" + REPLACEMENT_CHAR;
private static final Map characterToEntityMap = mkCharacterToEntityMap();
private static final Trie entityToCharacterTrie = mkEntityToCharacterTrie();
/**
*
*/
public HTMLEntityCodec() {
}
/**
* Given an array of {@code char}, scan the input {@code String} and encode unsafe
* codePoints, except for codePoints passed into the {@code char} array.
*
* WARNING: This method will silently discard any code point per the
* call to {@code Character.isValidCodePoint( int )} method.
*
* {@inheritDoc}
*/
@Override
public String encode(char[] immune, String input) {
StringBuilder sb = new StringBuilder();
for(int offset = 0; offset < input.length(); ){
final int point = input.codePointAt(offset);
if(Character.isValidCodePoint(point)){
sb.append(encodeCharacter(immune, point));
}
offset += Character.charCount(point);
}
return sb.toString();
}
/**
* {@inheritDoc}
*
* Encodes a codePoint for safe use in an HTML entity field.
* @param immune
*/
@Override
public String encodeCharacter( char[] immune, int codePoint ) {
// check for immune characters
// Cast the codePoint to a char because we want to limit immunity to the BMP field only.
if ( containsCharacter( (char) codePoint, immune ) && Character.isValidCodePoint(codePoint)) {
return new StringBuilder().appendCodePoint(codePoint).toString();
}
// check for alphanumeric characters
String hex = super.getHexForNonAlphanumeric(codePoint);
if ( hex == null && Character.isValidCodePoint(codePoint)) {
return new StringBuilder().appendCodePoint(codePoint).toString();
}
// check for illegal characters
if ( ( codePoint <= 0x1f
&& codePoint != '\t'
&& codePoint != '\n'
&& codePoint != '\r' )
|| ( codePoint >= 0x7f && codePoint <= 0x9f ) )
{
hex = REPLACEMENT_HEX; // Let's entity encode this instead of returning it
codePoint = REPLACEMENT_CHAR;
}
// check if there's a defined entity
String entityName = characterToEntityMap.get(codePoint);
if (entityName != null) {
return "&" + entityName + ";";
}
// return the hex entity as suggested in the spec
return "" + hex + ";";
}
/**
* {@inheritDoc}
*
* Returns the decoded version of the character starting at index, or
* null if no decoding is possible.
*
* Formats all are legal both with and without semi-colon, upper/lower case:
* dddd;
* hhhh;
* &name;
*/
public Integer decodeCharacter( PushbackSequence input ) {
input.mark();
Integer first = input.next();
if ( first == null ) {
input.reset();
return null;
}
// if this is not an encoded character, return null
if (first != '&' ) {
input.reset();
return null;
}
// test for numeric encodings
Integer second = input.next();
if ( second == null ) {
input.reset();
return null;
}
if (second == '#' ) {
// handle numbers
Integer c = getNumericEntity( input );
if ( c != null ) return c;
} else if ( Character.isLetter( second ) ) {
// handle entities
input.pushback( second );
Integer c = getNamedEntity( input );
if ( c != null ) return c;
}
input.reset();
return null;
}
/**
* getNumericEntry checks input to see if it is a numeric entity
*
* @param input
* The input to test for being a numeric entity
*
* @return
* null if input is null, the character of input after decoding
*/
private Integer getNumericEntity( PushbackSequence input ) {
Integer first = input.peek();
if ( first == null ) return null;
if (first == 'x' || first == 'X' ) {
input.next();
return parseHex( input );
}
return parseNumber( input );
}
/**
* Parse a decimal number, such as those from JavaScript's String.fromCharCode(value)
*
* @param input
* decimal encoded string, such as 65
* @return
* character representation of this decimal value, e.g. A
* @throws NumberFormatException
*/
private Integer parseNumber( PushbackSequence input ) {
StringBuilder sb = new StringBuilder();
while( input.hasNext() ) {
Integer c = input.peek();
// if character is a digit then add it on and keep going
if ( Character.isDigit( c ) && Character.isValidCodePoint(c) ) {
sb.appendCodePoint( c );
input.next();
// if character is a semi-colon, eat it and quit
} else if (c == ';' ) {
input.next();
break;
// otherwise just quit
} else {
break;
}
}
try {
int i = Integer.parseInt(sb.toString());
if (Character.isValidCodePoint(i)) {
return i;
}
} catch( NumberFormatException e ) {
// throw an exception for malformed entity?
}
return null;
}
/**
* Parse a hex encoded entity
*
* @param input
* Hex encoded input (such as 437ae;)
* @return
* A single character from the string
* @throws NumberFormatException
*/
private Integer parseHex( PushbackSequence input ) {
StringBuilder sb = new StringBuilder();
while( input.hasNext() ) {
Integer c = input.peek();
// if character is a hex digit then add it on and keep going
//This statement implicitly tests for Character.isValidCodePoint(int)
if ( "0123456789ABCDEFabcdef".indexOf(c) != -1 ) {
sb.appendCodePoint( c );
input.next();
// if character is a semi-colon, eat it and quit
} else if (c == ';' ) {
input.next();
break;
// otherwise just quit
} else {
break;
}
}
try {
int i = Integer.parseInt(sb.toString(), 16);
if (Character.isValidCodePoint(i)) {
return i;
}
} catch( NumberFormatException e ) {
// throw an exception for malformed entity?
}
return null;
}
/**
*
* Returns the decoded version of the character starting at index, or
* null if no decoding is possible.
*
* Formats all are legal both with and without semi-colon, upper/lower case:
* &aa;
* &aaa;
* &aaaa;
* &aaaaa;
* &aaaaaa;
* &aaaaaaa;
*
* @param input
* A string containing a named entity like "
* @return
* Returns the decoded version of the character starting at index, or null if no decoding is possible.
*/
private Integer getNamedEntity( PushbackSequence input ) {
StringBuilder possible = new StringBuilder();
Entry entry;
int len;
// kludge around PushbackString....
len = Math.min(input.remainder().length(), entityToCharacterTrie.getMaxKeyLength());
for(int i=0;i exactEntry = entityToCharacterTrie.getLongestMatch(possibleStringLowerCase);
if(exactEntry != null) entry = exactEntry;
}
if(entry == null) return null; // no match, caller will reset input
}
// fixup input
input.reset();
input.next(); // read &
len = entry.getKey().length(); // what matched's length
for(int i=0;i mkCharacterToEntityMap()
{
Map map = new HashMap(252);
map.put(34, "quot"); /* quotation mark */
map.put(38, "amp"); /* ampersand */
map.put(60, "lt"); /* less-than sign */
map.put(62, "gt"); /* greater-than sign */
map.put(160, "nbsp"); /* no-break space */
map.put(161, "iexcl"); /* inverted exclamation mark */
map.put(162, "cent"); /* cent sign */
map.put(163, "pound"); /* pound sign */
map.put(164, "curren"); /* currency sign */
map.put(165, "yen"); /* yen sign */
map.put(166, "brvbar"); /* broken bar */
map.put(167, "sect"); /* section sign */
map.put(168, "uml"); /* diaeresis */
map.put(169, "copy"); /* copyright sign */
map.put(170, "ordf"); /* feminine ordinal indicator */
map.put(171, "laquo"); /* left-pointing double angle quotation mark */
map.put(172, "not"); /* not sign */
map.put(173, "shy"); /* soft hyphen */
map.put(174, "reg"); /* registered sign */
map.put(175, "macr"); /* macron */
map.put(176, "deg"); /* degree sign */
map.put(177, "plusmn"); /* plus-minus sign */
map.put(178, "sup2"); /* superscript two */
map.put(179, "sup3"); /* superscript three */
map.put(180, "acute"); /* acute accent */
map.put(181, "micro"); /* micro sign */
map.put(182, "para"); /* pilcrow sign */
map.put(183, "middot"); /* middle dot */
map.put(184, "cedil"); /* cedilla */
map.put(185, "sup1"); /* superscript one */
map.put(186, "ordm"); /* masculine ordinal indicator */
map.put(187, "raquo"); /* right-pointing double angle quotation mark */
map.put(188, "frac14"); /* vulgar fraction one quarter */
map.put(189, "frac12"); /* vulgar fraction one half */
map.put(190, "frac34"); /* vulgar fraction three quarters */
map.put(191, "iquest"); /* inverted question mark */
map.put(192, "Agrave"); /* Latin capital letter a with grave */
map.put(193, "Aacute"); /* Latin capital letter a with acute */
map.put(194, "Acirc"); /* Latin capital letter a with circumflex */
map.put(195, "Atilde"); /* Latin capital letter a with tilde */
map.put(196, "Auml"); /* Latin capital letter a with diaeresis */
map.put(197, "Aring"); /* Latin capital letter a with ring above */
map.put(198, "AElig"); /* Latin capital letter ae */
map.put(199, "Ccedil"); /* Latin capital letter c with cedilla */
map.put(200, "Egrave"); /* Latin capital letter e with grave */
map.put(201, "Eacute"); /* Latin capital letter e with acute */
map.put(202, "Ecirc"); /* Latin capital letter e with circumflex */
map.put(203, "Euml"); /* Latin capital letter e with diaeresis */
map.put(204, "Igrave"); /* Latin capital letter i with grave */
map.put(205, "Iacute"); /* Latin capital letter i with acute */
map.put(206, "Icirc"); /* Latin capital letter i with circumflex */
map.put(207, "Iuml"); /* Latin capital letter i with diaeresis */
map.put(208, "ETH"); /* Latin capital letter eth */
map.put(209, "Ntilde"); /* Latin capital letter n with tilde */
map.put(210, "Ograve"); /* Latin capital letter o with grave */
map.put(211, "Oacute"); /* Latin capital letter o with acute */
map.put(212, "Ocirc"); /* Latin capital letter o with circumflex */
map.put(213, "Otilde"); /* Latin capital letter o with tilde */
map.put(214, "Ouml"); /* Latin capital letter o with diaeresis */
map.put(215, "times"); /* multiplication sign */
map.put(216, "Oslash"); /* Latin capital letter o with stroke */
map.put(217, "Ugrave"); /* Latin capital letter u with grave */
map.put(218, "Uacute"); /* Latin capital letter u with acute */
map.put(219, "Ucirc"); /* Latin capital letter u with circumflex */
map.put(220, "Uuml"); /* Latin capital letter u with diaeresis */
map.put(221, "Yacute"); /* Latin capital letter y with acute */
map.put(222, "THORN"); /* Latin capital letter thorn */
map.put(223, "szlig"); /* Latin small letter sharp sXCOMMAX German Eszett */
map.put(224, "agrave"); /* Latin small letter a with grave */
map.put(225, "aacute"); /* Latin small letter a with acute */
map.put(226, "acirc"); /* Latin small letter a with circumflex */
map.put(227, "atilde"); /* Latin small letter a with tilde */
map.put(228, "auml"); /* Latin small letter a with diaeresis */
map.put(229, "aring"); /* Latin small letter a with ring above */
map.put(230, "aelig"); /* Latin lowercase ligature ae */
map.put(231, "ccedil"); /* Latin small letter c with cedilla */
map.put(232, "egrave"); /* Latin small letter e with grave */
map.put(233, "eacute"); /* Latin small letter e with acute */
map.put(234, "ecirc"); /* Latin small letter e with circumflex */
map.put(235, "euml"); /* Latin small letter e with diaeresis */
map.put(236, "igrave"); /* Latin small letter i with grave */
map.put(237, "iacute"); /* Latin small letter i with acute */
map.put(238, "icirc"); /* Latin small letter i with circumflex */
map.put(239, "iuml"); /* Latin small letter i with diaeresis */
map.put(240, "eth"); /* Latin small letter eth */
map.put(241, "ntilde"); /* Latin small letter n with tilde */
map.put(242, "ograve"); /* Latin small letter o with grave */
map.put(243, "oacute"); /* Latin small letter o with acute */
map.put(244, "ocirc"); /* Latin small letter o with circumflex */
map.put(245, "otilde"); /* Latin small letter o with tilde */
map.put(246, "ouml"); /* Latin small letter o with diaeresis */
map.put(247, "divide"); /* division sign */
map.put(248, "oslash"); /* Latin small letter o with stroke */
map.put(249, "ugrave"); /* Latin small letter u with grave */
map.put(250, "uacute"); /* Latin small letter u with acute */
map.put(251, "ucirc"); /* Latin small letter u with circumflex */
map.put(252, "uuml"); /* Latin small letter u with diaeresis */
map.put(253, "yacute"); /* Latin small letter y with acute */
map.put(254, "thorn"); /* Latin small letter thorn */
map.put(255, "yuml"); /* Latin small letter y with diaeresis */
map.put(338, "OElig"); /* Latin capital ligature oe */
map.put(339, "oelig"); /* Latin small ligature oe */
map.put(352, "Scaron"); /* Latin capital letter s with caron */
map.put(353, "scaron"); /* Latin small letter s with caron */
map.put(376, "Yuml"); /* Latin capital letter y with diaeresis */
map.put(402, "fnof"); /* Latin small letter f with hook */
map.put(710, "circ"); /* modifier letter circumflex accent */
map.put(732, "tilde"); /* small tilde */
map.put(913, "Alpha"); /* Greek capital letter alpha */
map.put(914, "Beta"); /* Greek capital letter beta */
map.put(915, "Gamma"); /* Greek capital letter gamma */
map.put(916, "Delta"); /* Greek capital letter delta */
map.put(917, "Epsilon"); /* Greek capital letter epsilon */
map.put(918, "Zeta"); /* Greek capital letter zeta */
map.put(919, "Eta"); /* Greek capital letter eta */
map.put(920, "Theta"); /* Greek capital letter theta */
map.put(921, "Iota"); /* Greek capital letter iota */
map.put(922, "Kappa"); /* Greek capital letter kappa */
map.put(923, "Lambda"); /* Greek capital letter lambda */
map.put(924, "Mu"); /* Greek capital letter mu */
map.put(925, "Nu"); /* Greek capital letter nu */
map.put(926, "Xi"); /* Greek capital letter xi */
map.put(927, "Omicron"); /* Greek capital letter omicron */
map.put(928, "Pi"); /* Greek capital letter pi */
map.put(929, "Rho"); /* Greek capital letter rho */
map.put(931, "Sigma"); /* Greek capital letter sigma */
map.put(932, "Tau"); /* Greek capital letter tau */
map.put(933, "Upsilon"); /* Greek capital letter upsilon */
map.put(934, "Phi"); /* Greek capital letter phi */
map.put(935, "Chi"); /* Greek capital letter chi */
map.put(936, "Psi"); /* Greek capital letter psi */
map.put(937, "Omega"); /* Greek capital letter omega */
map.put(945, "alpha"); /* Greek small letter alpha */
map.put(946, "beta"); /* Greek small letter beta */
map.put(947, "gamma"); /* Greek small letter gamma */
map.put(948, "delta"); /* Greek small letter delta */
map.put(949, "epsilon"); /* Greek small letter epsilon */
map.put(950, "zeta"); /* Greek small letter zeta */
map.put(951, "eta"); /* Greek small letter eta */
map.put(952, "theta"); /* Greek small letter theta */
map.put(953, "iota"); /* Greek small letter iota */
map.put(954, "kappa"); /* Greek small letter kappa */
map.put(955, "lambda"); /* Greek small letter lambda */
map.put(956, "mu"); /* Greek small letter mu */
map.put(957, "nu"); /* Greek small letter nu */
map.put(958, "xi"); /* Greek small letter xi */
map.put(959, "omicron"); /* Greek small letter omicron */
map.put(960, "pi"); /* Greek small letter pi */
map.put(961, "rho"); /* Greek small letter rho */
map.put(962, "sigmaf"); /* Greek small letter final sigma */
map.put(963, "sigma"); /* Greek small letter sigma */
map.put(964, "tau"); /* Greek small letter tau */
map.put(965, "upsilon"); /* Greek small letter upsilon */
map.put(966, "phi"); /* Greek small letter phi */
map.put(967, "chi"); /* Greek small letter chi */
map.put(968, "psi"); /* Greek small letter psi */
map.put(969, "omega"); /* Greek small letter omega */
map.put(977, "thetasym"); /* Greek theta symbol */
map.put(978, "upsih"); /* Greek upsilon with hook symbol */
map.put(982, "piv"); /* Greek pi symbol */
map.put(8194, "ensp"); /* en space */
map.put(8195, "emsp"); /* em space */
map.put(8201, "thinsp"); /* thin space */
map.put(8204, "zwnj"); /* zero width non-joiner */
map.put(8205, "zwj"); /* zero width joiner */
map.put(8206, "lrm"); /* left-to-right mark */
map.put(8207, "rlm"); /* right-to-left mark */
map.put(8211, "ndash"); /* en dash */
map.put(8212, "mdash"); /* em dash */
map.put(8216, "lsquo"); /* left single quotation mark */
map.put(8217, "rsquo"); /* right single quotation mark */
map.put(8218, "sbquo"); /* single low-9 quotation mark */
map.put(8220, "ldquo"); /* left double quotation mark */
map.put(8221, "rdquo"); /* right double quotation mark */
map.put(8222, "bdquo"); /* double low-9 quotation mark */
map.put(8224, "dagger"); /* dagger */
map.put(8225, "Dagger"); /* double dagger */
map.put(8226, "bull"); /* bullet */
map.put(8230, "hellip"); /* horizontal ellipsis */
map.put(8240, "permil"); /* per mille sign */
map.put(8242, "prime"); /* prime */
map.put(8243, "Prime"); /* double prime */
map.put(8249, "lsaquo"); /* single left-pointing angle quotation mark */
map.put(8250, "rsaquo"); /* single right-pointing angle quotation mark */
map.put(8254, "oline"); /* overline */
map.put(8260, "frasl"); /* fraction slash */
map.put(8364, "euro"); /* euro sign */
map.put(8465, "image"); /* black-letter capital i */
map.put(8472, "weierp"); /* script capital pXCOMMAX Weierstrass p */
map.put(8476, "real"); /* black-letter capital r */
map.put(8482, "trade"); /* trademark sign */
map.put(8501, "alefsym"); /* alef symbol */
map.put(8592, "larr"); /* leftwards arrow */
map.put(8593, "uarr"); /* upwards arrow */
map.put(8594, "rarr"); /* rightwards arrow */
map.put(8595, "darr"); /* downwards arrow */
map.put(8596, "harr"); /* left right arrow */
map.put(8629, "crarr"); /* downwards arrow with corner leftwards */
map.put(8656, "lArr"); /* leftwards double arrow */
map.put(8657, "uArr"); /* upwards double arrow */
map.put(8658, "rArr"); /* rightwards double arrow */
map.put(8659, "dArr"); /* downwards double arrow */
map.put(8660, "hArr"); /* left right double arrow */
map.put(8704, "forall"); /* for all */
map.put(8706, "part"); /* partial differential */
map.put(8707, "exist"); /* there exists */
map.put(8709, "empty"); /* empty set */
map.put(8711, "nabla"); /* nabla */
map.put(8712, "isin"); /* element of */
map.put(8713, "notin"); /* not an element of */
map.put(8715, "ni"); /* contains as member */
map.put(8719, "prod"); /* n-ary product */
map.put(8721, "sum"); /* n-ary summation */
map.put(8722, "minus"); /* minus sign */
map.put(8727, "lowast"); /* asterisk operator */
map.put(8730, "radic"); /* square root */
map.put(8733, "prop"); /* proportional to */
map.put(8734, "infin"); /* infinity */
map.put(8736, "ang"); /* angle */
map.put(8743, "and"); /* logical and */
map.put(8744, "or"); /* logical or */
map.put(8745, "cap"); /* intersection */
map.put(8746, "cup"); /* union */
map.put(8747, "int"); /* integral */
map.put(8756, "there4"); /* therefore */
map.put(8764, "sim"); /* tilde operator */
map.put(8773, "cong"); /* congruent to */
map.put(8776, "asymp"); /* almost equal to */
map.put(8800, "ne"); /* not equal to */
map.put(8801, "equiv"); /* identical toXCOMMAX equivalent to */
map.put(8804, "le"); /* less-than or equal to */
map.put(8805, "ge"); /* greater-than or equal to */
map.put(8834, "sub"); /* subset of */
map.put(8835, "sup"); /* superset of */
map.put(8836, "nsub"); /* not a subset of */
map.put(8838, "sube"); /* subset of or equal to */
map.put(8839, "supe"); /* superset of or equal to */
map.put(8853, "oplus"); /* circled plus */
map.put(8855, "otimes"); /* circled times */
map.put(8869, "perp"); /* up tack */
map.put(8901, "sdot"); /* dot operator */
map.put(8968, "lceil"); /* left ceiling */
map.put(8969, "rceil"); /* right ceiling */
map.put(8970, "lfloor"); /* left floor */
map.put(8971, "rfloor"); /* right floor */
map.put(9001, "lang"); /* left-pointing angle bracket */
map.put(9002, "rang"); /* right-pointing angle bracket */
map.put(9674, "loz"); /* lozenge */
map.put(9824, "spades"); /* black spade suit */
map.put(9827, "clubs"); /* black club suit */
map.put(9829, "hearts"); /* black heart suit */
map.put(9830, "diams"); /* black diamond suit */
return Collections.unmodifiableMap(map);
}
/**
* Build a unmodifiable Trie from entitiy Name to Character
* @return Unmodifiable trie.
*/
private static synchronized Trie mkEntityToCharacterTrie()
{
Trie trie = new HashTrie();
for(Map.Entry entry : characterToEntityMap.entrySet())
trie.put(entry.getValue(),entry.getKey());
return Trie.Util.unmodifiable(trie);
}
}