com.sun.faces.util.HtmlUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of javax.faces Show documentation
Show all versions of javax.faces Show documentation
This is the master POM file for Oracle's Implementation of the JSF 2.2 Specification.
/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
*
* Copyright (c) 1997-2010 Oracle and/or its affiliates. All rights reserved.
*
* The contents of this file are subject to the terms of either the GNU
* General Public License Version 2 only ("GPL") or the Common Development
* and Distribution License("CDDL") (collectively, the "License"). You
* may not use this file except in compliance with the License. You can
* obtain a copy of the License at
* https://glassfish.dev.java.net/public/CDDL+GPL_1_1.html
* or packager/legal/LICENSE.txt. See the License for the specific
* language governing permissions and limitations under the License.
*
* When distributing the software, include this License Header Notice in each
* file and include the License file at packager/legal/LICENSE.txt.
*
* GPL Classpath Exception:
* Oracle designates this particular file as subject to the "Classpath"
* exception as provided by Oracle in the GPL Version 2 section of the License
* file that accompanied this code.
*
* Modifications:
* If applicable, add the following below the License Header, with the fields
* enclosed by brackets [] replaced by your own identifying information:
* "Portions Copyright [year] [name of copyright owner]"
*
* Contributor(s):
* If you wish your version of this file to be governed by only the CDDL or
* only the GPL Version 2, indicate your decision by adding "[Contributor]
* elects to include this software in this distribution under the [CDDL or GPL
* Version 2] license." If you don't indicate a single choice of license, a
* recipient has the option to distribute your version of this file under
* either the CDDL, the GPL Version 2 or to extend the choice of license to
* its licensees as provided above. However, if you add GPL Version 2 code
* and therefore, elected the GPL Version 2 license, then the option applies
* only if the new code is made subject to such option by the copyright
* holder.
*/
package com.sun.faces.util;
import com.sun.faces.config.WebConfiguration;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.BitSet;
import java.util.Set;
import java.util.HashSet;
import java.util.Arrays;
/**
* Utility class for HTML.
* Kudos to Adam Winer (Oracle) for much of this code.
*/
public class HtmlUtils {
private final static Set UTF_CHARSET = new HashSet(Arrays.asList("UTF-8", "UTF-16",
"UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE", "x-UTF-16LE-BOM", "X-UTF-32BE-BOM",
"X-UTF-32LE-BOM", ""));
//-------------------------------------------------
// The following methods include the handling of
// escape characters....
//-------------------------------------------------
static public void writeText(Writer out,
boolean escapeUnicode,
boolean escapeIsocode, char[] buffer,
char[] text) throws IOException {
writeText(out, escapeUnicode, escapeIsocode, buffer, text, 0, text.length);
}
/**
* Write char array text.
*/
static public void writeText(Writer out,
boolean escapeUnicode,
boolean escapeIsocode, char[] buff,
char[] text,
int start,
int length) throws IOException {
int buffLength = buff.length;
int buffIndex = 0;
int end = start + length;
for (int i = start; i < end; i++) {
buffIndex = writeTextChar(out, escapeUnicode, escapeIsocode, text[i], buffIndex, buff, buffLength);
}
flushBuffer(out, buff, buffIndex);
}
/**
* Write String text.
*/
static public void writeText(Writer out,
boolean escapeUnicode,
boolean escapeIsocode, char[] buff,
String text,
char[] textBuff) throws IOException {
int length = text.length();
if (length >= 16) {
text.getChars(0, length, textBuff, 0);
writeText(out, escapeUnicode, escapeIsocode, buff, textBuff, 0, length);
} else {
int buffLength = buff.length;
int buffIndex = 0;
for (int i = 0; i < length; i++) {
char ch = text.charAt(i);
buffIndex = writeTextChar(out, escapeUnicode, escapeIsocode, ch, buffIndex, buff, buffLength);
}
flushBuffer(out, buff, buffIndex);
}
}
private static int writeTextChar(Writer out,
boolean escapeUnicode,
boolean escapeIsocode,
char ch,
int buffIndex,
char[] buff,
int buffLength) throws IOException {
int nextIndex;
if (ch <= 0x1f) {
if (!isPrintableControlChar(ch)) {
return buffIndex;
}
}
if (ch < 0xA0) {
// If "?" or over, no escaping is needed (this covers
// most of the Latin alphabet)
if (ch >= 0x3f) {
nextIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
} else if (ch >= 0x27) { // If above "'"...
// If between "'" and ";", no escaping is needed
if (ch < 0x3c) {
nextIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
} else if (ch == '<') {
nextIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
LT_CHARS);
} else if (ch == '>') {
nextIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
GT_CHARS);
} else {
nextIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
}
} else {
if (ch == '&') {
nextIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
AMP_CHARS);
} else if (ch == '"') {
nextIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
QUOT_CHARS);
} else {
nextIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
}
}
} else if (ch <= 0xff) {
if (escapeIsocode) {
// ISO-8859-1 entities: encode as needed
nextIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
sISO8859_1_Entities[ch - 0xA0]);
}
else {
nextIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
}
} else {
if(escapeUnicode) {
// UNICODE entities: encode as needed
nextIndex =
_writeDecRef(out, buff, buffIndex, buffLength, ch);
} else {
nextIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
}
}
return nextIndex;
}
/**
* Write a string attribute. Note that this code
* is duplicated below for character arrays - change both
* places if you make any changes!!!
*/
static public void writeAttribute(Writer out,
boolean escapeUnicode,
boolean escapeIsocode,
char[] buff,
String text,
char[] textBuff,
boolean isScriptInAttributeValueEnabled) throws IOException {
int length = text.length();
if (length >= 16) {
if (length > textBuff.length) {
// resize our buffer
textBuff = new char[length * 2];
}
text.getChars(0, length, textBuff, 0);
writeAttribute(out, escapeUnicode, escapeIsocode, buff, textBuff, 0, length,
isScriptInAttributeValueEnabled);
} else {
int buffLength = buff.length;
int buffIndex = 0;
for (int i = 0; i < length; i++) {
char ch = text.charAt(i);
if (ch <= 0x1f) {
if (!isPrintableControlChar(ch)) {
continue;
}
}
// Tilde or less...
if (ch < 0xA0) {
// If "?" or over, no escaping is needed (this covers
// most of the Latin alphabet)
if (ch >= 0x3f) {
if (ch == 's') {
// If putting scripts in attribute values
// has been disabled (the defualt), look for
// script: in the attribute value.
// ensure the attribute value is long enough
// to accomodate "script:"
if (!isScriptInAttributeValueEnabled &&
((i + 6) < text.length())) {
if ('c' == text.charAt(i + 1) &&
'r' == text.charAt(i + 2) &&
'i' == text.charAt(i + 3) &&
'p' == text.charAt(i + 4) &&
't' == text.charAt(i + 5) &&
':' == text.charAt(i + 6)) {
return;
}
}
}
buffIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
} else if (ch >= 0x27) { // If above "'"...
// If between "'" and ";", no escaping is needed
if (ch < 0x3c) {
buffIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
} else if (ch == '<') {
buffIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
LT_CHARS);
} else if (ch == '>') {
buffIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
GT_CHARS);
} else {
buffIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
}
} else {
if (ch == '&') {
// HTML 4.0, section B.7.1: ampersands followed by
// an open brace don't get escaped
if ((i + 1 < length) && (text.charAt(i + 1)
== '{')) {
buffIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
ch);
} else {
buffIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
AMP_CHARS);
}
} else if (ch == '"') {
buffIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
QUOT_CHARS);
} else {
buffIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
}
}
} else if (ch <= 0xff) {
if (escapeIsocode) {
// ISO-8859-1 entities: encode as needed
buffIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
sISO8859_1_Entities[ch - 0xA0]);
} else {
buffIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
}
} else {
if(escapeUnicode) {
// UNICODE entities: encode as needed
buffIndex =
_writeDecRef(out, buff, buffIndex, buffLength, ch);
} else {
buffIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
}
}
}
flushBuffer(out, buff, buffIndex);
}
}
static public void writeAttribute(Writer out,
boolean escapeUnicode,
boolean escapeIsocode,
char[] buffer,
char[] text) throws IOException {
writeAttribute(out, escapeUnicode, escapeIsocode, buffer, text, 0, text.length,
WebConfiguration.BooleanWebContextInitParameter.EnableScriptInAttributeValue.getDefaultValue());
}
/**
* Write a character array attribute. Note that this code
* is duplicated above for string - change both places if you make
* any changes!!!
*/
static public void writeAttribute(Writer out,
boolean escapeUnicode,
boolean escapeIsocode,
char[] buff,
char[] text,
int start,
int length,
boolean isScriptInAttributeValueEnabled) throws IOException {
int buffLength = buff.length;
int buffIndex = 0;
int end = start + length;
for (int i = start; i < end; i++) {
char ch = text[i];
// "Application Program Command" or less...
if (ch <= 0x1f) {
if (!isPrintableControlChar(ch)) {
continue;
}
}
if (ch < 0xA0) {
// If "?" or over, no escaping is needed (this covers
// most of the Latin alphabet)
if (ch >= 0x3f) {
if (ch == 's') {
// If putting scripts in attribute values
// has been disabled (the defualt), look for
// script: in the attribute value.
// ensure the attribute value is long enough
// to accomodate "script:"
if (!isScriptInAttributeValueEnabled &&
((i + 6) < text.length)) {
if ('c' == text[i + 1] &&
'r' == text[i + 2] &&
'i' == text[i + 3] &&
'p' == text[i + 4] &&
't' == text[i + 5] &&
':' == text[i + 6]) {
return;
}
}
}
buffIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
} else if (ch >= 0x27) { // If above "'"...
if (ch < 0x3c) {
// If between "'" and ";", no escaping is needed
buffIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
} else if (ch == '<') {
buffIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
LT_CHARS);
} else if (ch == '>') {
buffIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
GT_CHARS);
} else {
buffIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
}
} else {
if (ch == '&') {
// HTML 4.0, section B.7.1: ampersands followed by
// an open brace don't get escaped
if ((i + 1 < end) && (text[i + 1] == '{')) {
buffIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
ch);
} else {
buffIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
AMP_CHARS);
}
} else if (ch == '"') {
buffIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
QUOT_CHARS);
} else {
buffIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
}
}
} else if (ch <= 0xff) {
if (escapeIsocode) {
// ISO-8859-1 entities: encode as needed
buffIndex = addToBuffer(out,
buff,
buffIndex,
buffLength,
sISO8859_1_Entities[ch - 0xA0]);
}
else {
buffIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
}
} else {
if(escapeUnicode) {
// UNICODE entities: encode as needed
buffIndex = _writeDecRef(out, buff, buffIndex, buffLength, ch);
} else {
buffIndex = addToBuffer(out, buff, buffIndex,
buffLength, ch);
}
}
}
flushBuffer(out, buff, buffIndex);
}
static private boolean isPrintableControlChar(int ch) {
return (ch == 0x09 || ch == 0x0A || ch == 0x0C || ch == 0x0D);
}
/**
* Writes a character as a decimal escape. Hex escapes are smaller than
* the decimal version, but Netscape didn't support hex escapes until
* 4.7.4.
*/
static private int _writeDecRef(Writer out,
char[] buffer,
int bufferIndex,
int bufferLength,
char ch) throws IOException {
if (ch == '\u20ac') {
bufferIndex = addToBuffer(out,
buffer,
bufferIndex,
bufferLength,
EURO_CHARS);
return bufferIndex;
}
bufferIndex = addToBuffer(out,
buffer,
bufferIndex,
bufferLength,
DEC_REF_START);
// Formerly used String.valueOf(). This version tests out
// about 40% faster in a microbenchmark (and on systems where GC is
// going gonzo, it should be even better)
int i = (int) ch;
if (i > 10000) {
bufferIndex = addToBuffer(out, buffer, bufferIndex, bufferLength, ((char) ('0' + (i / 10000))));
i = i % 10000;
bufferIndex = addToBuffer(out, buffer, bufferIndex, bufferLength, ((char) ('0' + (i / 1000))));
i = i % 1000;
bufferIndex = addToBuffer(out, buffer, bufferIndex, bufferLength, ((char) ('0' + (i / 100))));
i = i % 100;
bufferIndex = addToBuffer(out, buffer, bufferIndex, bufferLength, ((char) ('0' + (i / 10))));
i = i % 10;
bufferIndex = addToBuffer(out, buffer, bufferIndex, bufferLength, ((char) ('0' + i)));
} else if (i > 1000) {
bufferIndex = addToBuffer(out, buffer, bufferIndex, bufferLength, ((char) ('0' + (i / 1000))));
i = i % 1000;
bufferIndex = addToBuffer(out, buffer, bufferIndex, bufferLength, ((char) ('0' + (i / 100))));
i = i % 100;
bufferIndex = addToBuffer(out, buffer, bufferIndex, bufferLength, ((char) ('0' + (i / 10))));
i = i % 10;
bufferIndex = addToBuffer(out, buffer, bufferIndex, bufferLength, ((char) ('0' + i)));
} else {
bufferIndex = addToBuffer(out, buffer, bufferIndex, bufferLength, ((char) ('0' + (i / 100))));
i = i % 100;
bufferIndex = addToBuffer(out, buffer, bufferIndex, bufferLength, ((char) ('0' + (i / 10))));
i = i % 10;
bufferIndex = addToBuffer(out, buffer, bufferIndex, bufferLength, ((char) ('0' + i)));
}
return addToBuffer(out, buffer, bufferIndex, bufferLength, ';');
}
//
// Buffering scheme: we use a tremendously simple buffering
// scheme that greatly reduces the number of calls into the
// Writer/PrintWriter. In practice this has produced significant
// measured performance gains (at least in JDK 1.3.1).
//
/**
* Add a character to the buffer, flushing the buffer if the buffer is
* full, and returning the new buffer index
*/
private static int addToBuffer(Writer out,
char[] buffer,
int bufferIndex,
int bufferLength,
char ch) throws IOException {
if (bufferIndex >= bufferLength) {
out.write(buffer, 0, bufferIndex);
bufferIndex = 0;
}
buffer[bufferIndex] = ch;
return bufferIndex + 1;
}
/**
* Add an array of characters to the buffer, flushing the buffer
* if the buffer is full, and returning the new buffer index.
*/
private static int addToBuffer(Writer out,
char[] buffer,
int bufferIndex,
int bufferLength,
char[] toAdd) throws IOException {
if (bufferIndex >= bufferLength
|| (toAdd.length + bufferIndex >= bufferLength)) {
out.write(buffer, 0, bufferIndex);
bufferIndex = 0;
}
System.arraycopy(toAdd, 0, buffer, bufferIndex, toAdd.length);
return bufferIndex + toAdd.length;
}
/**
* Flush the contents of the buffer to the output stream
* and return the reset buffer index
*/
private static int flushBuffer(Writer out,
char[] buffer,
int bufferIndex) throws IOException {
if (bufferIndex > 0)
out.write(buffer, 0, bufferIndex);
return 0;
}
private HtmlUtils() {
}
/**
* Writes a string into URL-encoded format out to a Writer.
*
* All characters before the start of the query string will be encoded
* using ISO-8859-1.
* PENDING: Ideally, we'd encode characters before the query string
* using UTF-8, which is what the HTML spec recommends. Unfortunately,
* the Apache server doesn't support this until 2.0.
*
* Characters after the start of the query string will be encoded
* using a client-defined encoding. You'll need to use the encoding
* that the server will expect. (HTML forms will generate query
* strings using the character encoding that the HTML itself was
* generated in.)
*
* All characters will be encoded as needed for URLs, with the exception
* of the percent symbol ("%"). Because this is the character
* itself used for escaping, attempting to escape this character
* would cause this code to double-escape some strings. It also may
* be necessary to pre-escape some characters. In particular, a
* question mark ("?") is considered the start of the query string.
*
*
*
* NOTE: This is method is duplicated below. The difference being
* the acceptance of a char[] for the text to write. Any changes made
* here, should be made below.
*
*
* @param out a Writer for the output
* @param text the unencoded (or partially encoded) String
* @param queryEncoding the character set encoding for after the first
* question mark
*/
static public void writeURL(Writer out,
String text,
char[] textBuff,
String queryEncoding)
throws IOException, UnsupportedEncodingException {
int length = text.length();
if (length >= 16) {
text.getChars(0, length, textBuff, 0);
writeURL(out, textBuff, 0, length, queryEncoding);
} else {
for (int i = 0; i < length; i++) {
char ch = text.charAt(i);
if ((ch < 33) || (ch > 126)) {
if (ch == ' ') {
out.write('+');
} else {
// ISO-8859-1. Blindly assume the character will be < 255.
// Not much we can do if it isn't.
writeURIDoubleHex(out, ch);
}
}
// DO NOT encode '%'. If you do, then for starters,
// we'll double-encode anything that's pre-encoded.
// And, what's worse, there becomes no way to use
// characters that must be encoded if you
// don't want them to be interpreted, like '?' or '&'.
// else if('%' == ch)
// {
// writeURIDoubleHex(out, ch);
// }
else if (ch == '"') {
out.write("%22");
}
// Everything in the query parameters will be decoded
// as if it were in the request's character set. So use
// the real encoding for those!
else if (ch == '?') {
out.write('?');
encodeURIString(out,
text,
queryEncoding,
i + 1);
return;
} else {
out.write(ch);
}
}
}
}
/**
* Writes a string into URL-encoded format out to a Writer.
*
* All characters before the start of the query string will be encoded
* using ISO-8859-1.
* PENDING: Ideally, we'd encode characters before the query string
* using UTF-8, which is what the HTML spec recommends. Unfortunately,
* the Apache server doesn't support this until 2.0.
*
* Characters after the start of the query string will be encoded
* using a client-defined encoding. You'll need to use the encoding
* that the server will expect. (HTML forms will generate query
* strings using the character encoding that the HTML itself was
* generated in.)
*
* All characters will be encoded as needed for URLs, with the exception
* of the percent symbol ("%"). Because this is the character
* itself used for escaping, attempting to escape this character
* would cause this code to double-escape some strings. It also may
* be necessary to pre-escape some characters. In particular, a
* question mark ("?") is considered the start of the query string.
*
*
* NOTE: This is method is duplicated above. The difference being
* the acceptance of a String for the text to write. Any changes made
* here, should be made above.
*
*
* @param out a Writer for the output
* @param textBuff char[] containing the content to write
* @param queryEncoding the character set encoding for after the first
* question mark
*/
static public void writeURL(Writer out,
char[] textBuff,
int start,
int len,
String queryEncoding)
throws IOException, UnsupportedEncodingException {
int end = start + len;
for (int i = start; i < end; i++) {
char ch = textBuff[i];
if ((ch < 33) || (ch > 126)) {
if (ch == ' ') {
out.write('+');
} else {
// ISO-8859-1. Blindly assume the character will be < 255.
// Not much we can do if it isn't.
writeURIDoubleHex(out, ch);
}
}
// DO NOT encode '%'. If you do, then for starters,
// we'll double-encode anything that's pre-encoded.
// And, what's worse, there becomes no way to use
// characters that must be encoded if you
// don't want them to be interpreted, like '?' or '&'.
// else if('%' == ch)
// {
// writeURIDoubleHex(out, ch);
// }
else if (ch == '"') {
out.write("%22");
}
// Everything in the query parameters will be decoded
// as if it were in the request's character set. So use
// the real encoding for those!
else if (ch == '?') {
out.write('?');
encodeURIString(out,
textBuff,
queryEncoding,
i + 1,
end);
return;
} else {
out.write(ch);
}
}
}
static public void writeTextForXML(Writer out, String text, char[] outbuf)
throws IOException {
char[] textBuffer = new char[128];
int len = text.toString().length();
if (textBuffer.length < len) {
textBuffer = new char[len * 2];
}
HtmlUtils.writeText(out, true, true, outbuf, text, textBuffer);
}
// Encode a String into URI-encoded form. This code will
// appear rather (ahem) similar to java.net.URLEncoder
// This is duplicated below accepting a char[] for the content
// to write. Any changes here, should be made there as well.
static private void encodeURIString(Writer out,
String text,
String encoding,
int start)
throws IOException {
MyByteArrayOutputStream buf = null;
OutputStreamWriter writer = null;
char[] charArray = null;
int length = text.length();
for (int i = start; i < length; i++) {
char ch = text.charAt(i);
if (DONT_ENCODE_SET.get(ch)) {
if (ch == '&') {
if (((i + 1) < length) && isAmpEscaped(text, i + 1)) {
out.write(ch);
continue;
}
out.write(AMP_CHARS);
} else {
out.write(ch);
}
} else {
if (buf == null) {
buf = new MyByteArrayOutputStream(MAX_BYTES_PER_CHAR);
if (encoding != null) {
writer = new OutputStreamWriter(buf, encoding);
} else {
writer = new OutputStreamWriter(buf);
}
charArray = new char[1];
}
// convert to external encoding before hex conversion
try {
// An inspection of OutputStreamWriter reveals
// that write(char) always allocates a one element
// character array. We can reuse our own.
charArray[0] = ch;
writer.write(charArray, 0, 1);
writer.flush();
} catch (IOException e) {
buf.reset();
continue;
}
byte[] ba = buf.getBuf();
for (int j = 0, size = buf.size(); j < size; j++) {
writeURIDoubleHex(out, ba[j] + 256);
}
buf.reset();
}
}
}
// Encode a String into URI-encoded form. This code will
// appear rather (ahem) similar to java.net.URLEncoder
// This is duplicated above accepting a String for the content
// to write. Any changes here, should be made there as well.
static private void encodeURIString(Writer out,
char[] textBuff,
String encoding,
int start,
int end)
throws IOException {
MyByteArrayOutputStream buf = null;
OutputStreamWriter writer = null;
char[] charArray = null;
for (int i = start; i < end; i++) {
char ch = textBuff[i];
if (DONT_ENCODE_SET.get(ch)) {
if (ch == '&') {
if (((i + 1) < end) && isAmpEscaped(textBuff, i + 1)) {
out.write(ch);
continue;
}
out.write(AMP_CHARS);
} else {
out.write(ch);
}
} else {
if (buf == null) {
buf = new MyByteArrayOutputStream(MAX_BYTES_PER_CHAR);
if (encoding != null) {
writer = new OutputStreamWriter(buf, encoding);
} else {
writer = new OutputStreamWriter(buf);
}
charArray = new char[1];
}
// convert to external encoding before hex conversion
try {
// An inspection of OutputStreamWriter reveals
// that write(char) always allocates a one element
// character array. We can reuse our own.
charArray[0] = ch;
writer.write(charArray, 0, 1);
writer.flush();
} catch (IOException e) {
buf.reset();
continue;
}
byte[] ba = buf.getBuf();
for (int j = 0, size = buf.size(); j < size; j++) {
writeURIDoubleHex(out, ba[j] + 256);
}
buf.reset();
}
}
}
// NOTE: Any changes made to this method should be made
// in the associated method that accepts a char[] instead
// of String
static private boolean isAmpEscaped(String text, int idx) {
for (int i = 1, ix = idx; i < AMP_CHARS.length; i++, ix++) {
if (text.charAt(ix) == AMP_CHARS[i]) {
continue;
}
return false;
}
return true;
}
// NOTE: Any changes made to this method should be made
// in the associated method that accepts a String instead
// of char[]
static private boolean isAmpEscaped(char[] text, int idx) {
for (int i = 1, ix = idx; i < AMP_CHARS.length; i++, ix++) {
if (text[ix] == AMP_CHARS[i]) {
continue;
}
return false;
}
return true;
}
static private void writeURIDoubleHex(Writer out,
int i) throws IOException {
out.write('%');
out.write(intToHex((i >> 4) % 0x10));
out.write(intToHex(i % 0x10));
}
static private char intToHex(int i) {
if (i < 10)
return ((char) ('0' + i));
else
return ((char) ('A' + (i - 10)));
}
static private final char[] AMP_CHARS = "&".toCharArray();
static private final char[] QUOT_CHARS = """.toCharArray();
static private final char[] GT_CHARS = ">".toCharArray();
static private final char[] LT_CHARS = "<".toCharArray();
static private final char[] EURO_CHARS = "€".toCharArray();
static private final char[] DEC_REF_START = "".toCharArray();
static private final int MAX_BYTES_PER_CHAR = 10;
static private final BitSet DONT_ENCODE_SET = new BitSet(256);
// See: http://www.ietf.org/rfc/rfc2396.txt
// We're not fully along for that ride either, but we do encode
// ' ' as '%20', and don't bother encoding '~' or '/'
static {
for (int i = 'a'; i <= 'z'; i++) {
DONT_ENCODE_SET.set(i);
}
for (int i = 'A'; i <= 'Z'; i++) {
DONT_ENCODE_SET.set(i);
}
for (int i = '0'; i <= '9'; i++) {
DONT_ENCODE_SET.set(i);
}
// Don't encode '%' - we don't want to double encode anything.
DONT_ENCODE_SET.set('%');
// Ditto for '+', which is an encoded space
DONT_ENCODE_SET.set('+');
DONT_ENCODE_SET.set('#');
DONT_ENCODE_SET.set('&');
DONT_ENCODE_SET.set('=');
DONT_ENCODE_SET.set('-');
DONT_ENCODE_SET.set('_');
DONT_ENCODE_SET.set('.');
DONT_ENCODE_SET.set('*');
DONT_ENCODE_SET.set('~');
DONT_ENCODE_SET.set('/');
DONT_ENCODE_SET.set('\'');
DONT_ENCODE_SET.set('!');
DONT_ENCODE_SET.set('(');
DONT_ENCODE_SET.set(')');
DONT_ENCODE_SET.set(';');
}
//
// Entities from HTML 4.0, section 24.2.1; character codes 0xA0 to 0xFF
//
static private char[][] sISO8859_1_Entities = new char[][]{
" ".toCharArray(),
"¡".toCharArray(),
"¢".toCharArray(),
"£".toCharArray(),
"¤".toCharArray(),
"¥".toCharArray(),
"¦".toCharArray(),
"§".toCharArray(),
"¨".toCharArray(),
"©".toCharArray(),
"ª".toCharArray(),
"«".toCharArray(),
"¬".toCharArray(),
"".toCharArray(),
"®".toCharArray(),
"¯".toCharArray(),
"°".toCharArray(),
"±".toCharArray(),
"²".toCharArray(),
"³".toCharArray(),
"´".toCharArray(),
"µ".toCharArray(),
"¶".toCharArray(),
"·".toCharArray(),
"¸".toCharArray(),
"¹".toCharArray(),
"º".toCharArray(),
"»".toCharArray(),
"¼".toCharArray(),
"½".toCharArray(),
"¾".toCharArray(),
"¿".toCharArray(),
"À".toCharArray(),
"Á".toCharArray(),
"Â".toCharArray(),
"Ã".toCharArray(),
"Ä".toCharArray(),
"Å".toCharArray(),
"Æ".toCharArray(),
"Ç".toCharArray(),
"È".toCharArray(),
"É".toCharArray(),
"Ê".toCharArray(),
"Ë".toCharArray(),
"Ì".toCharArray(),
"Í".toCharArray(),
"Î".toCharArray(),
"Ï".toCharArray(),
"Ð".toCharArray(),
"Ñ".toCharArray(),
"Ò".toCharArray(),
"Ó".toCharArray(),
"Ô".toCharArray(),
"Õ".toCharArray(),
"Ö".toCharArray(),
"×".toCharArray(),
"Ø".toCharArray(),
"Ù".toCharArray(),
"Ú".toCharArray(),
"Û".toCharArray(),
"Ü".toCharArray(),
"Ý".toCharArray(),
"Þ".toCharArray(),
"ß".toCharArray(),
"à".toCharArray(),
"á".toCharArray(),
"â".toCharArray(),
"ã".toCharArray(),
"ä".toCharArray(),
"å".toCharArray(),
"æ".toCharArray(),
"ç".toCharArray(),
"è".toCharArray(),
"é".toCharArray(),
"ê".toCharArray(),
"ë".toCharArray(),
"ì".toCharArray(),
"í".toCharArray(),
"î".toCharArray(),
"ï".toCharArray(),
"ð".toCharArray(),
"ñ".toCharArray(),
"ò".toCharArray(),
"ó".toCharArray(),
"ô".toCharArray(),
"õ".toCharArray(),
"ö".toCharArray(),
"÷".toCharArray(),
"ø".toCharArray(),
"ù".toCharArray(),
"ú".toCharArray(),
"û".toCharArray(),
"ü".toCharArray(),
"ý".toCharArray(),
"þ".toCharArray(),
"ÿ".toCharArray()
};
//----------------------------------------------------------
// The following is used to verify encodings
//----------------------------------------------------------
//
static public boolean validateEncoding(String encoding) {
return Charset.isSupported(encoding);
}
//----------------------------------------------------------
// Check if the given encoding is the ISO-8859-1 encoding
//----------------------------------------------------------
//
static public boolean isISO8859_1encoding(String encoding) {
return "ISO-8859-1".equals(encoding);
}
//----------------------------------------------------------
// Check if the given encoding is a UTF encoding
//----------------------------------------------------------
//
static public boolean isUTFencoding(String encoding) {
return UTF_CHARSET.contains(encoding);
}
//----------------------------------------------------------
// The following is used to verify "empty" Html elements.
// "Empty" Html elements are those that do not require an
// ending tag. For example,
or
...
//----------------------------------------------------------
static public boolean isEmptyElement(String name) {
char firstChar = name.charAt(0);
if (firstChar > _LAST_EMPTY_ELEMENT_START)
return false;
// Can we improve performance here? It's certainly slower to use
// a HashMap, at least if we can't assume the input name is lowercased.
String[] array = emptyElementArr[firstChar];
if (array != null) {
for (int i = array.length - 1; i >= 0; i--) {
if (name.equalsIgnoreCase(array[i]))
return true;
}
}
return false;
}
static private char _LAST_EMPTY_ELEMENT_START = 'p';
static private String[][] emptyElementArr =
new String[((int) _LAST_EMPTY_ELEMENT_START) + 1][];
static private String[] aNames = new String[]{
"area",
};
static private String[] bNames = new String[]{
"br",
"base",
"basefont",
};
static private String[] cNames = new String[]{
"col",
};
static private String[] fNames = new String[]{
"frame",
};
static private String[] hNames = new String[]{
"hr",
};
static private String[] iNames = new String[]{
"img",
"input",
"isindex",
};
static private String[] lNames = new String[]{
"link",
};
static private String[] mNames = new String[]{
"meta",
};
static private String[] pNames = new String[]{
"param",
};
static {
emptyElementArr['a'] = aNames;
emptyElementArr['A'] = aNames;
emptyElementArr['b'] = bNames;
emptyElementArr['B'] = bNames;
emptyElementArr['c'] = cNames;
emptyElementArr['C'] = cNames;
emptyElementArr['f'] = fNames;
emptyElementArr['F'] = fNames;
emptyElementArr['h'] = hNames;
emptyElementArr['H'] = hNames;
emptyElementArr['i'] = iNames;
emptyElementArr['I'] = iNames;
emptyElementArr['l'] = lNames;
emptyElementArr['L'] = lNames;
emptyElementArr['m'] = mNames;
emptyElementArr['M'] = mNames;
emptyElementArr['p'] = pNames;
emptyElementArr['P'] = pNames;
}
// ----------------------------------------------------------- Inner Classes
/**
* Private implementation of ByteArrayOutputStream.
*/
private static class MyByteArrayOutputStream extends ByteArrayOutputStream {
public MyByteArrayOutputStream(int initialCapacity) {
super(initialCapacity);
}
/**
* Obtain access to the underlying byte array to prevent
* unecessary temp object creation.
* @return buf
*/
public byte[] getBuf() {
return buf;
}
}
}