org.openbp.common.markup.HTMLEscapeHelper Maven / Gradle / Ivy
Show all versions of openbp-common Show documentation
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openbp.common.markup;
import java.util.HashMap;
import java.util.Map;
/**
* HTML-related utilities, i\.e\. to convert convert regular text to HTML text.
*/
public final class HTMLEscapeHelper
{
//////////////////////////////////////////////////
// @@ Special character data
//////////////////////////////////////////////////
/**
* Table of special characters like umlaut characters that will be
* converted to their respective HTML representation.
* Based on http://hotwired.lycos.com/webmonkey/reference/special_characters
*/
static Object [][] entities = { {
// apostrophe
"#39", new Integer(39) }, {
// slash
"#47", new Integer('/') }, {
// backslash
"#92", new Integer('\\') }, {
// double-quote
"quot", new Integer(34) }, {
// ampersand
"amp", new Integer(38), }, {
// less-than
"lt", new Integer(60) }, {
// greater-than
"gt", new Integer(62) }, {
// breaking space
"nbsp", new Integer(160) }, {
// copyright
"copy", new Integer(169) }, {
// registered trademark
"reg", new Integer(174) }, {
// uppercase A, grave accent
"Agrave", new Integer(192) }, {
// uppercase A, acute accent
"Aacute", new Integer(193) }, {
// uppercase A, circumflex accent
"Acirc", new Integer(194) }, {
// uppercase A, tilde
"Atilde", new Integer(195) }, {
// uppercase A, umlaut
"Auml", new Integer(196) }, {
// uppercase A, ring
"Aring", new Integer(197) }, {
// uppercase AE
"AElig", new Integer(198) }, {
// uppercase C, cedilla
"Ccedil", new Integer(199) }, {
// uppercase E, grave accent
"Egrave", new Integer(200) }, {
// uppercase E, acute accent
"Eacute", new Integer(201) }, {
// uppercase E, circumflex accent
"Ecirc", new Integer(202) }, {
// uppercase E, umlaut
"Euml", new Integer(203) }, {
// uppercase I, grave accent
"Igrave", new Integer(204) }, {
// uppercase I, acute accent
"Iacute", new Integer(205) }, {
// uppercase I, circumflex accent
"Icirc", new Integer(206) }, {
// uppercase I, umlaut
"Iuml", new Integer(207) }, {
// uppercase Eth, Icelandic
"ETH", new Integer(208) }, {
// uppercase N, tilde
"Ntilde", new Integer(209) }, {
// uppercase O, grave accent
"Ograve", new Integer(210) }, {
// uppercase O, acute accent
"Oacute", new Integer(211) }, {
// uppercase O, circumflex accent
"Ocirc", new Integer(212) }, {
// uppercase O, tilde
"Otilde", new Integer(213) }, {
// uppercase O, umlaut
"Ouml", new Integer(214) }, {
// uppercase O, slash
"Oslash", new Integer(216) }, {
// uppercase U, grave accent
"Ugrave", new Integer(217) }, {
// uppercase U, acute accent
"Uacute", new Integer(218) }, {
// uppercase U, circumflex accent
"Ucirc", new Integer(219) }, {
// uppercase U, umlaut
"Uuml", new Integer(220) }, {
// uppercase Y, acute accent
"Yacute", new Integer(221) }, {
// uppercase THORN, Icelandic
"THORN", new Integer(222) }, {
// lowercase sharps, German
"szlig", new Integer(223) }, {
// lowercase a, grave accent
"agrave", new Integer(224) }, {
// lowercase a, acute accent
"aacute", new Integer(225) }, {
// lowercase a, circumflex accent
"acirc", new Integer(226) }, {
// lowercase a, tilde
"atilde", new Integer(227) }, {
// lowercase a, umlaut
"auml", new Integer(228) }, {
// lowercase a, ring
"aring", new Integer(229) }, {
// lowercase ae
"aelig", new Integer(230) }, {
// lowercase c, cedilla
"ccedil", new Integer(231) }, {
// lowercase e, grave accent
"egrave", new Integer(232) }, {
// lowercase e, acute accent
"eacute", new Integer(233) }, {
// lowercase e, circumflex accent
"ecirc", new Integer(234) }, {
// lowercase e, umlaut
"euml", new Integer(235) }, {
// lowercase i, grave accent
"igrave", new Integer(236) }, {
// lowercase i, acute accent
"iacute", new Integer(237) }, {
// lowercase i, circumflex accent
"icirc", new Integer(238) }, {
// lowercase i, umlaut
"iuml", new Integer(239) }, {
// lowercase eth, Icelandic
"eth", new Integer(240) }, {
// lowercase n, tilde
"ntilde", new Integer(241) }, {
// lowercase o, grave accent
"ograve", new Integer(242) }, {
// lowercase o, acute accent
"oacute", new Integer(243) }, {
// lowercase o, circumflex accent
"ocirc", new Integer(244) }, {
// lowercase o, tilde
"otilde", new Integer(245) }, {
// lowercase o, umlaut
"ouml", new Integer(246) }, {
// lowercase o, slash
"oslash", new Integer(248) }, {
// lowercase u, grave accent
"ugrave", new Integer(249) }, {
// lowercase u, acute accent
"uacute", new Integer(250) }, {
// lowercase u, circumflex accent
"ucirc", new Integer(251) }, {
// lowercase u, umlaut
"uuml", new Integer(252) }, {
// lowercase y, acute accent
"yacute", new Integer(253) }, {
// lowercase thorn, Icelandic
"thorn", new Integer(254) }, {
// lowercase y, umlaut
"yuml", new Integer(255) }, {
// Euro symbol
"euro", new Integer(8364) }, };
/**
* Hashtable of special characters.
*/
private static Map i2e = new HashMap();
static
{
for (int i = 0; i < entities.length; ++i)
{
i2e.put(entities [i] [1], entities [i] [0]);
}
}
/**
* Private constructor prevents instantiation.
*/
private HTMLEscapeHelper()
{
}
//////////////////////////////////////////////////
// @@ Escaped character data
//////////////////////////////////////////////////
/**
* Table of supported escaped characters like "\n" that will be
* converted to their HTML equivalents.
*/
private static Object [][] maskedEntities = { { "\n", "
" }, { "\t", " " } };
/**
* Hashtable of escaped characters.
*/
private static Map masked2html = new HashMap();
static
{
for (int i = 0; i < maskedEntities.length; ++i)
{
masked2html.put(maskedEntities [i] [0], maskedEntities [i] [1]);
}
}
/**
* Turns funky characters into HTML entity equivalents
* e.g. "bread" & "butter"
* =>
* "bread" & "butter".
*
* Supports nearly all HTML entities, including funky accents.
* See the source code for more detail.
*
* In addition, "\n" characters will be converted to HTML line breaks and "\t" to
* 4 non-breaking spaces.
*
* @param s String to escape
* @return Escaped string
*/
public static String htmlescape(String s)
{
if (s == null)
return "";
StringBuffer buf = new StringBuffer();
int n = s.length();
for (int i = 0; i < n; ++i)
{
char ch = s.charAt(i);
String entity = (String) i2e.get(new Integer(ch));
if (entity == null)
{
String htmlEntity = (String) masked2html.get(String.valueOf(ch));
if (htmlEntity != null)
{
buf.append(htmlEntity);
continue;
}
}
if (entity == null)
{
if (ch > 128)
{
buf.append("" + ((int) ch) + ";");
}
else
{
buf.append(ch);
}
}
else
{
buf.append("&" + entity + ";");
}
}
return buf.toString();
}
}