org.apache.myfaces.trinidadinternal.io.HTMLEscapes Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of trinidad-impl Show documentation
Private implementation of the Apache MyFaces Trinidad project
The newest version!
/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 * 
 *  http://www.apache.org/licenses/LICENSE-2.0
 * 
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License.
 */
package org.apache.myfaces.trinidadinternal.io;

import java.io.IOException;
import java.io.Writer;

/**
 * Utility class for escaping HTML text.
 * 
 */
public class HTMLEscapes
{
  // =-=AEW Performance - look at whether text and attributes
  // should be stored as character arrays or strings (might be
  // different decision for each), and make this class conform.

  /**
   * Write char array text.  Note that this code is duplicated below
   * for Strings - change both places if you make any changes!!!
   */
  static public void writeText(
    Writer out,
    char[]      buff,
    char[]      text,
    int         start,
    int         length
    ) throws IOException
  {
    _writeText(out, XMLEscapes.__BODY_ENTITIES, buff, text, start, length, true);
  }
  
  /**
   * Write String text.
   */
  static public void writeText(
    Writer out,
    char[] buff,
    String text
    ) throws IOException
  {
    _writeText(out, XMLEscapes.__BODY_ENTITIES, buff, text, true);
  }

  /**
   * Write a string attribute
   */
  static public void writeAttribute(
    Writer out,
    char[] buff,
    String attributeValue
    )
    throws IOException
  {
    _writeText(out, _ATTRIBUTE_ENTITIES, buff, attributeValue, false);
  }

  /**
   * Write char array text.  Note that this code is duplicated below
   * for Strings - change both places if you make any changes!!!
   */
  static private void _writeText(
    final Writer      out,
    final String[]    entities,
    final char[]      buff,
    final char[]      text,
    final int         start,
    final int         length,
    final boolean     isBodyText
    ) throws IOException
  {
    int buffIndex = 0;

    final int end = start + length;

    for (int i = start; i < end; i++)
    {
      final char ch = text[i];

      if (ch < 0xA0)
      {
        // text is in the US7ASCII range        
        if (ch >= 0x3f)
        {
          // US7ASCII text at "?" and above never needs to be escaped
          buffIndex = _addToBuffer(out, buff, buffIndex, ch);
        }
        else
        {
          // speed up handling of common characters like the space character by splitting the
          // range with possible entities into two parts--the high part where a null entity means
          // write the chacter through straight and the low part where a null entity means that
          // we have a line feed or carriage return, which require special handling
          if (ch > 0xD)
          {
            // high part of entities. Null entry means write ch through to buffer
            String entity = entities[ch];
            
            if (entity == null)
            {
              buffIndex = _addToBuffer(out, buff, buffIndex, ch);
            }
            else
            {
              // special case handling of & in attributes to support macros in future versions
              // of HTML
              if (ch == '&' && !isBodyText && (i + 1 < length) && (text[i + 1] == '{'))
                buffIndex = _addToBuffer(out, buff, buffIndex, ch);
              else
              {
                // not weird & attribute case, so write the entity
                buffIndex = _addToBuffer(out, buff, buffIndex, entity);
              }
            }
          }
          else
          {            
            // low part of entities. Null entry means we have a carriage return or line feed
            String entity = entities[ch];
            
            if (entity != null)
            {
              buffIndex = _addToBuffer(out, buff, buffIndex, entity);
            }
            else
            {
              buffIndex = _flushBuffer(out, buff, buffIndex);
              // handle carriage return/line feed
              
              // write out a newline
              _println(out);
              
              // collapse combinations of carriage return/line feed or line feed/carriage return
              // together
              char checkChar = (char)((ch == 0xD) ? 0xA : 0xD);
  
              if ((i + 1 < length) && (text[i + 1] == checkChar))
                i++;
            }            
          }
        }
      }
      else if (ch <= 0xff)
      {
        // character is in the high ISO range, so use HTML entity
        buffIndex = _addToBuffer(out, buff, buffIndex, _sISO8859_1_Entities[ch - 0xA0]);
      }
      else if (ch < 0xfffe) // characters fffe and ffff are considered outside of unicode
      {
        // character is outside of the ISO range
        if (isBodyText)
        {
          // See above for what _UNICODE_LINE_BREAK means...
          if (ch == _UNICODE_LINE_BREAK)
            buffIndex = _addToBuffer(out, buff, buffIndex, "
");
          else if (ch == _UNICODE_HYPHENATION_POINT)
            buffIndex = _addToBuffer(out, buff, buffIndex, "");
          else
            buffIndex = _writeDecRef(out, buff, buffIndex, ch);
        }
        else
        {
          buffIndex = _writeDecRef(out, buff, buffIndex, ch);         
        }
      }
    }

    // flush the buffer, since the caller doesn't try to maintain the buffer index betweeen calls
    _flushBuffer(out, buff, buffIndex);
  }
  
  /**
   * Write String text.  Note that this code is duplicated above for
   * character arrays - change both places if you make any changes!!!
   */
  static private void _writeText(
    final Writer      out,
    final String[]    entities,
    final char[]      buff,
    final String      text,
    final boolean     isBodyText
    ) throws IOException
  {
    int buffIndex = 0;

    final int length = text.length();

    for (int i = 0; i < length; i++)
    {
      final char ch = text.charAt(i);

      if (ch < 0xA0)
      {
        // text is in the US7ASCII range        
        if (ch >= 0x3f)
        {
          // US7ASCII text at "?" and above never needs to be escaped
          buffIndex = _addToBuffer(out, buff, buffIndex, ch);
        }
        else
        {
          // speed up handling of common characters like the space character by splitting the
          // range with possible entities into two parts--the high part where a null entity means
          // write the character through straight and the low part where a null entity means that
          // we have a line feed or carriage return, which require special handling
          if (ch > 0xD)
          {
            // high part of entities. Null entry means write ch through to buffer
            String entity = entities[ch];
            
            if (entity == null)
            {
              buffIndex = _addToBuffer(out, buff, buffIndex, ch);
            }
            else
            {
              // special case handling of & in attributes to support macros in future versions
              // of HTML
              if (ch == '&' && !isBodyText && (i + 1 < length) && (text.charAt(i + 1) == '{'))
                buffIndex = _addToBuffer(out, buff, buffIndex, ch);
              else
              {
                // not weird & attribute case, so write the entity
                buffIndex = _addToBuffer(out, buff, buffIndex, entity);
              }
            }
          }
          else
          {            
            // low part of entities. Null entry means we have a carriage return or line feed
            String entity = entities[ch];
            
            if (entity != null)
            {
              buffIndex = _addToBuffer(out, buff, buffIndex, entity);
            }
            else
            {
              buffIndex = _flushBuffer(out, buff, buffIndex);
              
              // handle carriage return/line feed
              
              // write out a newline
              _println(out);
              
              // collapse combinations of carriage return/line feed or line feed/carriage return
              // together
              char checkChar = (char)((ch == 0xD) ? 0xA : 0xD);
  
              if ((i + 1 < length) && (text.charAt(i + 1) == checkChar))
                i++;
            }            
          }
        }
      }
      else if (ch <= 0xff)
      {
        // character is in the high ISO range, so use HTML entity
        buffIndex = _addToBuffer(out, buff, buffIndex, _sISO8859_1_Entities[ch - 0xA0]);
      }
      else if (ch < 0xfffe) // characters fffe and ffff are considered outside of unicode
      {
        // character is outside of the ISO range
        if (isBodyText)
        {
          // See above for what _UNICODE_LINE_BREAK means...
          if (ch == _UNICODE_LINE_BREAK)
            buffIndex = _addToBuffer(out, buff, buffIndex, "
");
          else if (ch == _UNICODE_HYPHENATION_POINT)
            buffIndex = _addToBuffer(out, buff, buffIndex, "");
          else
            buffIndex = _writeDecRef(out, buff, buffIndex, ch);
        }
        else
        {
          buffIndex = _writeDecRef(out, buff, buffIndex, ch);         
        }
      }
    }

    // flush the buffer, since the caller doesn't try to maintain the buffer index betweeen calls
    _flushBuffer(out, buff, buffIndex);
  }

  /**
   * Writes the output as a decimal escape.  This is the same size or smaller than the hex
   * equivalent and works on versions of Netscape before 4.74. See bug #1491321.
   * 
   */
  static private int _writeDecRef(
    final Writer out,
    final char[] buff,
    int          buffIndex,
    final char   ch
    ) throws IOException
  {
    // Formerly used String.valueOf().  This version tests out
    // about 40% faster (and on systems where GC is going gonzo,
    // it should be massively better)

    // two branches, one using the buffer and one not
    if (buffIndex + 8 > buff.length)
    {
      // not enough room for biggest possible numeric character entry,
      // so flush buffer before we write to the output stream directly
      buffIndex = _flushBuffer(out, buff, buffIndex);
      
      // use XML escaping code
      XMLEscapes.__writeDecRef(out, ch);     
    }
    else
    {
      int i = ch;

      // we have enough space for the biggest string, so use buffer
      buff[buffIndex++] = '&';
      buff[buffIndex++] = '#';
 
      if (i > 10000)
      {      
        buff[buffIndex++] = (char)('0' + (i / 10000));
        i = i % 10000;
        buff[buffIndex++] = (char)('0' + (i / 1000));
        i = i % 1000;
        buff[buffIndex++] = (char)('0' + (i / 100));
        i = i % 100;
        buff[buffIndex++] = (char)('0' + (i / 10));
        i = i % 10;
        buff[buffIndex++] = (char)('0' + i);
      }
      else if (i > 1000)
      {
        buff[buffIndex++] = (char)('0' + (i / 1000));
        i = i % 1000;
        buff[buffIndex++] = (char)('0' + (i / 100));
        i = i % 100;
        buff[buffIndex++] = (char)('0' + (i / 10));
        i = i % 10;
        buff[buffIndex++] = (char)('0' + i);
      }
      else
      {
        buff[buffIndex++] = (char)('0' + (i / 100));
        i = i % 100;
        buff[buffIndex++] = (char)('0' + (i / 10));
        i = i % 10;
        buff[buffIndex++] = (char)('0' + i);
      }

      buff[buffIndex++] = ';';
    }
    
    return buffIndex;
  }  

  /**
   * Add a character to the buffer, flushing the buffer if the buffer is full,
   * and returning the new buffer index
   */
  private static int _addToBuffer(
    Writer out,
    char[] buffer,
    int    bufferIndex,
    char   ch
    ) throws IOException
  {
    if (bufferIndex >= buffer.length)
    {
      out.write(buffer, 0, bufferIndex);
      bufferIndex = 0;
    }

    buffer[bufferIndex] = ch;

    return bufferIndex + 1;
  }

  /**
   * Add a String to the buffer, flushing the buffer if the buffer is full,
   * and returning the new buffer index
   */
  private static int _addToBuffer(
    Writer out,
    char[] buffer,
    int    bufferIndex,
    String outString
    ) throws IOException
  {
    int writeSize = outString.length();
    int outSize = bufferIndex + writeSize;
    
    if (outSize >= buffer.length)
    {
      // flush the buffer
      out.write(buffer, 0, bufferIndex);
      bufferIndex = 0;
      
      // we will be the first chars in the buffer
      outSize = writeSize;
    }

    // copy into the buffer
    outString.getChars(0, writeSize, buffer, bufferIndex);
    
    return outSize;
  }

  /**
   * Flush the contents of the buffer to the output stream
   * and return the reset buffer index
   */
  private static int _flushBuffer(
    Writer out,
    char[]  buffer,
    int     bufferIndex
    ) throws IOException
  {
    if (bufferIndex > 0)
    {
      out.write(buffer, 0, bufferIndex);
    }

    return 0;
  }

  private static void _println(Writer out) throws IOException
  {
    out.write('\n');
  }
  

  private HTMLEscapes()
  {
  }

  // array of entities that need to be output in attributes for code points 0 - 62
  private static final String[] _ATTRIBUTE_ENTITIES;

  static
  {
    // initialize the entities that need to be escaped for attributes
    
    // we also need to escape the quote in attributes, but not the less-than
    _ATTRIBUTE_ENTITIES = XMLEscapes.__BASE_ENTITIES.clone();
    _ATTRIBUTE_ENTITIES['"'] = """;
  }
  
  //
  // Entities from HTML 4.0, section 24.2.1; character codes 0xA0 to 0xFF
  // If content size is important, all entities longer than four characters should be replaced
  // by their numeric equivalents
  //
  static private String[] _sISO8859_1_Entities = new String[]
  {
    " ",
    "¡",
    "¢",
    "£",
    "¤",
    "¥",
    "¦",
    "§",
    "¨",
    "©",
    "ª",
    "«",
    "¬",
    "",
    "®",
    "¯",
    "°",
    "±",
    "²",
    "³",
    "´",
    "µ",
    "¶",
    "·",
    "¸",
    "¹",
    "º",
    "»",
    "¼",
    "½",
    "¾",
    "¿",
    "À",
    "Á",
    "Â",
    "Ã",
    "Ä",
    "Å",
    "Æ",
    "Ç",
    "È",
    "É",
    "Ê",
    "Ë",
    "Ì",
    "Í",
    "Î",
    "Ï",
    "Ð",
    "Ñ",
    "Ò",
    "Ó",
    "Ô",
    "Õ",
    "Ö",
    "×",
    "Ø",
    "Ù",
    "Ú",
    "Û",
    "Ü",
    "Ý",
    "Þ",
    "ß",
    "à",
    "á",
    "â",
    "ã",
    "ä",
    "å",
    "æ",
    "ç",
    "è",
    "é",
    "ê",
    "ë",
    "ì",
    "í",
    "î",
    "ï",
    "ð",
    "ñ",
    "ò",
    "ó",
    "ô",
    "õ",
    "ö",
    "÷",
    "ø",
    "ù",
    "ú",
    "û",
    "ü",
    "ý",
    "þ",
    "ÿ"
  };

  // =-=AEW Need entities from 24.3.1 and 24.4.1

  // Constant for the Unicode line break character
  static private final char _UNICODE_LINE_BREAK = 0x2028;
  
  // Constant for the Unicode hyphenation point character.
  // UIFunctions.hyphenate(..) uses this character to indicate where to insert
  // the  tag. This tag inserts a no-width-space so that the browser may
  // break the line at that point.
  static private final char _UNICODE_HYPHENATION_POINT = 0x2027;
}