Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
// Copyright (c) 2012, Mike Samuel
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// Neither the name of the OWASP nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
package org.owasp.html;
import java.io.IOException;
import javax.annotation.Nullable;
/** Encoders and decoders for HTML. */
public final class Encoding {
/**
* Decodes HTML entities to produce a string containing only valid
* Unicode scalar values.
*
* @param s text/html
* @return text/plain
* @deprecated specify whether s is in an attribute value
*/
public static String decodeHtml(String s) {
return decodeHtml(s, false);
}
/**
* Decodes HTML entities to produce a string containing only valid
* Unicode scalar values.
*
* @param s text/html
* @param inAttribute is s in an attribute value?
* @return text/plain
*/
public static String decodeHtml(String s, boolean inAttribute) {
int firstAmp = s.indexOf('&');
int safeLimit = longestPrefixOfGoodCodeunits(s);
if ((firstAmp & safeLimit) < 0) { return s; }
StringBuilder sb;
{
int n = s.length();
sb = new StringBuilder(n);
int pos = 0;
int amp = firstAmp;
while (amp >= 0) {
sb.append(s, pos, amp);
int end = HtmlEntities.appendDecodedEntity(s, amp, n, inAttribute, sb);
pos = end;
amp = s.indexOf('&', end);
}
sb.append(s, pos, n);
}
stripBannedCodeunits(
sb,
firstAmp < 0
? safeLimit : safeLimit < 0
? firstAmp : Math.min(firstAmp, safeLimit));
return sb.toString();
}
/**
* Returns the portion of its input that consists of XML safe chars.
* @see XML Ch. 2.2 - Characters
*/
@TCB
static String stripBannedCodeunits(String s) {
int safeLimit = longestPrefixOfGoodCodeunits(s);
if (safeLimit < 0) { return s; }
StringBuilder sb = new StringBuilder(s);
stripBannedCodeunits(sb, safeLimit);
return sb.toString();
}
/**
* Leaves in the input buffer only code-units that comprise XML safe chars.
* @see XML Ch. 2.2 - Characters
*/
@TCB
static void stripBannedCodeunits(StringBuilder sb) {
stripBannedCodeunits(sb, 0);
}
@TCB
private static void stripBannedCodeunits(StringBuilder sb, int start) {
int k = start;
for (int i = start, n = sb.length(); i < n; ++i) {
char ch = sb.charAt(i);
if (ch < 0x20) {
if (IS_BANNED_ASCII[ch]) {
continue;
}
} else if (0xd800 <= ch) {
if (ch <= 0xdfff) {
if (i+1 < n) {
char next = sb.charAt(i+1);
if (Character.isSurrogatePair(ch, next)) {
sb.setCharAt(k++, ch);
sb.setCharAt(k++, next);
++i;
}
}
continue;
} else if ((ch & 0xfffe) == 0xfffe) {
continue;
}
}
sb.setCharAt(k++, ch);
}
sb.setLength(k);
}
/**
* The number of code-units at the front of s that form code-points in the
* XML Character production.
* @return -1 if all of s is in the XML Character production.
*/
@TCB
private static int longestPrefixOfGoodCodeunits(String s) {
int n = s.length(), i;
for (i = 0; i < n; ++i) {
char ch = s.charAt(i);
if (ch < 0x20) {
if (IS_BANNED_ASCII[ch]) {
return i;
}
} else if (0xd800 <= ch) {
if (ch <= 0xdfff) {
if (i+1 < n && Character.isSurrogatePair(ch, s.charAt(i+1))) {
++i; // Skip over low surrogate since we know it's ok.
} else {
return i;
}
} else if ((ch & 0xfffe) == 0xfffe) {
return i;
}
}
}
return -1;
}
/**
* Appends an encoded form of plainText to output where the encoding is
* sufficient to prevent an HTML parser from interpreting any characters in
* the appended chunk as part of an attribute or tag boundary.
*
* @param plainText text/plain
* @param output a buffer of text/html that has a well-formed HTML prefix that
* ends after the open-quote of an attribute value and does not yet contain
* a corresponding close quote.
* Modified in place.
*/
static void encodeHtmlAttribOnto(String plainText, Appendable output)
throws IOException {
encodeHtmlOnto(plainText, output, "{\u200B");
}
/**
* Appends an encoded form of plainText to putput where the encoding is
* sufficient to prevent an HTML parser from transitioning out of the
*
* Data state.
*
* This is suitable for encoding a text node inside any element that does not
* require special handling as a context element (see "context element" in
*
* step 4.)
*
* @param plainText text/plain
* @param output a buffer of text/html that has a well-formed HTML prefix that
* would leave an HTML parser in the Data state if it were to encounter a space
* character as the next character. In practice this means that the buffer
* does not contain partial tags or comments, and does not have an unclosed
* element with a special content model.
*/
static void encodePcdataOnto(String plainText, Appendable output)
throws IOException {
// Avoid problems with client-side template languages like
// Angular & Polymer which attach special significance to text like
// {{...}}.
// We split brackets so that these template languages don't end up
// executing expressions in sanitized text.
encodeHtmlOnto(plainText, output, "{");
}
/**
* Appends an encoded form of plainText to putput where the encoding is
* sufficient to prevent an HTML parser from transitioning out of the
*
* RCDATA state.
*
* This is suitable for encoding a text node inside a {@code