All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.myfaces.trinidadinternal.io.HTMLEscapes Maven / Gradle / Ivy

There is a newer version: 2.2.1
Show newest version
/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 * 
 *  http://www.apache.org/licenses/LICENSE-2.0
 * 
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License.
 */
package org.apache.myfaces.trinidadinternal.io;

import java.io.IOException;
import java.io.Writer;

/**
 * Utility class for escaping HTML text.
 * 

*/ public class HTMLEscapes { // =-=AEW Performance - look at whether text and attributes // should be stored as character arrays or strings (might be // different decision for each), and make this class conform. /** * Write char array text. Note that this code is duplicated below * for Strings - change both places if you make any changes!!! */ static public void writeText( Writer out, char[] buff, char[] text, int start, int length ) throws IOException { _writeText(out, XMLEscapes.__BODY_ENTITIES, buff, text, start, length, true); } /** * Write String text. */ static public void writeText( Writer out, char[] buff, String text ) throws IOException { _writeText(out, XMLEscapes.__BODY_ENTITIES, buff, text, true); } /** * Write a string attribute */ static public void writeAttribute( Writer out, char[] buff, String attributeValue ) throws IOException { _writeText(out, _ATTRIBUTE_ENTITIES, buff, attributeValue, false); } /** * Write char array text. Note that this code is duplicated below * for Strings - change both places if you make any changes!!! */ static private void _writeText( final Writer out, final String[] entities, final char[] buff, final char[] text, final int start, final int length, final boolean isBodyText ) throws IOException { int buffIndex = 0; final int end = start + length; for (int i = start; i < end; i++) { final char ch = text[i]; if (ch < 0xA0) { // text is in the US7ASCII range if (ch >= 0x3f) { // US7ASCII text at "?" and above never needs to be escaped buffIndex = _addToBuffer(out, buff, buffIndex, ch); } else { // speed up handling of common characters like the space character by splitting the // range with possible entities into two parts--the high part where a null entity means // write the chacter through straight and the low part where a null entity means that // we have a line feed or carriage return, which require special handling if (ch > 0xD) { // high part of entities. Null entry means write ch through to buffer String entity = entities[ch]; if (entity == null) { buffIndex = _addToBuffer(out, buff, buffIndex, ch); } else { // special case handling of & in attributes to support macros in future versions // of HTML if (ch == '&' && !isBodyText && (i + 1 < length) && (text[i + 1] == '{')) buffIndex = _addToBuffer(out, buff, buffIndex, ch); else { // not weird & attribute case, so write the entity buffIndex = _addToBuffer(out, buff, buffIndex, entity); } } } else { // low part of entities. Null entry means we have a carriage return or line feed String entity = entities[ch]; if (entity != null) { buffIndex = _addToBuffer(out, buff, buffIndex, entity); } else { buffIndex = _flushBuffer(out, buff, buffIndex); // handle carriage return/line feed // write out a newline _println(out); // collapse combinations of carriage return/line feed or line feed/carriage return // together char checkChar = (char)((ch == 0xD) ? 0xA : 0xD); if ((i + 1 < length) && (text[i + 1] == checkChar)) i++; } } } } else if (ch <= 0xff) { // character is in the high ISO range, so use HTML entity buffIndex = _addToBuffer(out, buff, buffIndex, _sISO8859_1_Entities[ch - 0xA0]); } else if (ch < 0xfffe) // characters fffe and ffff are considered outside of unicode { // character is outside of the ISO range if (isBodyText) { // See above for what _UNICODE_LINE_BREAK means... if (ch == _UNICODE_LINE_BREAK) buffIndex = _addToBuffer(out, buff, buffIndex, "
"); else if (ch == _UNICODE_HYPHENATION_POINT) buffIndex = _addToBuffer(out, buff, buffIndex, ""); else buffIndex = _writeDecRef(out, buff, buffIndex, ch); } else { buffIndex = _writeDecRef(out, buff, buffIndex, ch); } } } // flush the buffer, since the caller doesn't try to maintain the buffer index betweeen calls _flushBuffer(out, buff, buffIndex); } /** * Write String text. Note that this code is duplicated above for * character arrays - change both places if you make any changes!!! */ static private void _writeText( final Writer out, final String[] entities, final char[] buff, final String text, final boolean isBodyText ) throws IOException { int buffIndex = 0; final int length = text.length(); for (int i = 0; i < length; i++) { final char ch = text.charAt(i); if (ch < 0xA0) { // text is in the US7ASCII range if (ch >= 0x3f) { // US7ASCII text at "?" and above never needs to be escaped buffIndex = _addToBuffer(out, buff, buffIndex, ch); } else { // speed up handling of common characters like the space character by splitting the // range with possible entities into two parts--the high part where a null entity means // write the character through straight and the low part where a null entity means that // we have a line feed or carriage return, which require special handling if (ch > 0xD) { // high part of entities. Null entry means write ch through to buffer String entity = entities[ch]; if (entity == null) { buffIndex = _addToBuffer(out, buff, buffIndex, ch); } else { // special case handling of & in attributes to support macros in future versions // of HTML if (ch == '&' && !isBodyText && (i + 1 < length) && (text.charAt(i + 1) == '{')) buffIndex = _addToBuffer(out, buff, buffIndex, ch); else { // not weird & attribute case, so write the entity buffIndex = _addToBuffer(out, buff, buffIndex, entity); } } } else { // low part of entities. Null entry means we have a carriage return or line feed String entity = entities[ch]; if (entity != null) { buffIndex = _addToBuffer(out, buff, buffIndex, entity); } else { buffIndex = _flushBuffer(out, buff, buffIndex); // handle carriage return/line feed // write out a newline _println(out); // collapse combinations of carriage return/line feed or line feed/carriage return // together char checkChar = (char)((ch == 0xD) ? 0xA : 0xD); if ((i + 1 < length) && (text.charAt(i + 1) == checkChar)) i++; } } } } else if (ch <= 0xff) { // character is in the high ISO range, so use HTML entity buffIndex = _addToBuffer(out, buff, buffIndex, _sISO8859_1_Entities[ch - 0xA0]); } else if (ch < 0xfffe) // characters fffe and ffff are considered outside of unicode { // character is outside of the ISO range if (isBodyText) { // See above for what _UNICODE_LINE_BREAK means... if (ch == _UNICODE_LINE_BREAK) buffIndex = _addToBuffer(out, buff, buffIndex, "
"); else if (ch == _UNICODE_HYPHENATION_POINT) buffIndex = _addToBuffer(out, buff, buffIndex, ""); else buffIndex = _writeDecRef(out, buff, buffIndex, ch); } else { buffIndex = _writeDecRef(out, buff, buffIndex, ch); } } } // flush the buffer, since the caller doesn't try to maintain the buffer index betweeen calls _flushBuffer(out, buff, buffIndex); } /** * Writes the output as a decimal escape. This is the same size or smaller than the hex * equivalent and works on versions of Netscape before 4.74. See bug #1491321. *

*/ static private int _writeDecRef( final Writer out, final char[] buff, int buffIndex, final char ch ) throws IOException { // Formerly used String.valueOf(). This version tests out // about 40% faster (and on systems where GC is going gonzo, // it should be massively better) // two branches, one using the buffer and one not if (buffIndex + 8 > buff.length) { // not enough room for biggest possible numeric character entry, // so flush buffer before we write to the output stream directly buffIndex = _flushBuffer(out, buff, buffIndex); // use XML escaping code XMLEscapes.__writeDecRef(out, ch); } else { int i = ch; // we have enough space for the biggest string, so use buffer buff[buffIndex++] = '&'; buff[buffIndex++] = '#'; if (i > 10000) { buff[buffIndex++] = (char)('0' + (i / 10000)); i = i % 10000; buff[buffIndex++] = (char)('0' + (i / 1000)); i = i % 1000; buff[buffIndex++] = (char)('0' + (i / 100)); i = i % 100; buff[buffIndex++] = (char)('0' + (i / 10)); i = i % 10; buff[buffIndex++] = (char)('0' + i); } else if (i > 1000) { buff[buffIndex++] = (char)('0' + (i / 1000)); i = i % 1000; buff[buffIndex++] = (char)('0' + (i / 100)); i = i % 100; buff[buffIndex++] = (char)('0' + (i / 10)); i = i % 10; buff[buffIndex++] = (char)('0' + i); } else { buff[buffIndex++] = (char)('0' + (i / 100)); i = i % 100; buff[buffIndex++] = (char)('0' + (i / 10)); i = i % 10; buff[buffIndex++] = (char)('0' + i); } buff[buffIndex++] = ';'; } return buffIndex; } /** * Add a character to the buffer, flushing the buffer if the buffer is full, * and returning the new buffer index */ private static int _addToBuffer( Writer out, char[] buffer, int bufferIndex, char ch ) throws IOException { if (bufferIndex >= buffer.length) { out.write(buffer, 0, bufferIndex); bufferIndex = 0; } buffer[bufferIndex] = ch; return bufferIndex + 1; } /** * Add a String to the buffer, flushing the buffer if the buffer is full, * and returning the new buffer index */ private static int _addToBuffer( Writer out, char[] buffer, int bufferIndex, String outString ) throws IOException { int writeSize = outString.length(); int outSize = bufferIndex + writeSize; if (outSize >= buffer.length) { // flush the buffer out.write(buffer, 0, bufferIndex); bufferIndex = 0; // we will be the first chars in the buffer outSize = writeSize; } // copy into the buffer outString.getChars(0, writeSize, buffer, bufferIndex); return outSize; } /** * Flush the contents of the buffer to the output stream * and return the reset buffer index */ private static int _flushBuffer( Writer out, char[] buffer, int bufferIndex ) throws IOException { if (bufferIndex > 0) { out.write(buffer, 0, bufferIndex); } return 0; } private static void _println(Writer out) throws IOException { out.write('\n'); } private HTMLEscapes() { } // array of entities that need to be output in attributes for code points 0 - 62 private static final String[] _ATTRIBUTE_ENTITIES; static { // initialize the entities that need to be escaped for attributes // we also need to escape the quote in attributes, but not the less-than _ATTRIBUTE_ENTITIES = XMLEscapes.__BASE_ENTITIES.clone(); _ATTRIBUTE_ENTITIES['"'] = """; } // // Entities from HTML 4.0, section 24.2.1; character codes 0xA0 to 0xFF // If content size is important, all entities longer than four characters should be replaced // by their numeric equivalents // static private String[] _sISO8859_1_Entities = new String[] { " ", "¡", "¢", "£", "¤", "¥", "¦", "§", "¨", "©", "ª", "«", "¬", "­", "®", "¯", "°", "±", "²", "³", "´", "µ", "¶", "·", "¸", "¹", "º", "»", "¼", "½", "¾", "¿", "À", "Á", "Â", "Ã", "Ä", "Å", "Æ", "Ç", "È", "É", "Ê", "Ë", "Ì", "Í", "Î", "Ï", "Ð", "Ñ", "Ò", "Ó", "Ô", "Õ", "Ö", "×", "Ø", "Ù", "Ú", "Û", "Ü", "Ý", "Þ", "ß", "à", "á", "â", "ã", "ä", "å", "æ", "ç", "è", "é", "ê", "ë", "ì", "í", "î", "ï", "ð", "ñ", "ò", "ó", "ô", "õ", "ö", "÷", "ø", "ù", "ú", "û", "ü", "ý", "þ", "ÿ" }; // =-=AEW Need entities from 24.3.1 and 24.4.1 // Constant for the Unicode line break character static private final char _UNICODE_LINE_BREAK = 0x2028; // Constant for the Unicode hyphenation point character. // UIFunctions.hyphenate(..) uses this character to indicate where to insert // the tag. This tag inserts a no-width-space so that the browser may // break the line at that point. static private final char _UNICODE_HYPHENATION_POINT = 0x2027; }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy