org.gatein.common.text.EntityEncoder Maven / Gradle / Ivy
The newest version!
/******************************************************************************
* JBoss, a division of Red Hat *
* Copyright 2009, Red Hat Middleware, LLC, and individual *
* contributors as indicated by the @authors tag. See the *
* copyright.txt in the distribution for a full listing of *
* individual contributors. *
* *
* This is free software; you can redistribute it and/or modify it *
* under the terms of the GNU Lesser General Public License as *
* published by the Free Software Foundation; either version 2.1 of *
* the License, or (at your option) any later version. *
* *
* This software is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
* Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public *
* License along with this software; if not, write to the Free *
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA *
* 02110-1301 USA, or see the FSF site: http://www.fsf.org. *
******************************************************************************/
package org.gatein.common.text;
import org.gatein.common.util.ParameterValidation;
import java.util.HashMap;
import java.util.Map;
/**
* This encoder performs lookup for converting a char to its HTML entity representation.
*
* @author Julien Viet
* @version $Revision: 630 $
*/
public class EntityEncoder extends AbstractCharEncoder
{
public void encode(char c, CharWriter writer) throws EncodingException
{
ParameterValidation.throwIllegalArgExceptionIfNull(writer, "CharWriter");
String s = lookup(c);
//
if (s != null)
{
writer.append('&');
writer.append(s);
writer.append(';');
}
else
{
writer.append(c);
}
}
public void safeEncode(char[] chars, int off, int len, CharWriter writer) throws EncodingException
{
// The index of the last copied char
int previous = off;
//
int to = off + len;
// Perform lookup char by char
for (int current = off; current < to; current++)
{
// Lookup
String replacement = lookup(chars[current]);
// Do we have a replacement
if (replacement != null)
{
// We lazy create the result
// Append the previous chars if any
writer.append(chars, previous, current - previous);
// Append the replaced entity
writer.append('&').append(replacement).append(';');
// Update the previous pointer
previous = current + 1;
}
}
//
writer.append(chars, previous, chars.length - previous);
}
/** All HTML entities. */
public static final EntityEncoder FULL = new EntityEncoder();
/** All HTML entities except the HTML chars which are used to do HTML itself. */
public static final EntityEncoder BASIC = new BasicEntityTable();
private static class BasicEntityTable extends EntityEncoder
{
public BasicEntityTable()
{
remove('<');
remove('>');
remove('"');
remove('&');
}
}
// this uses two hashmaps, which would be similar to the approach with the BidiMap from commons-collections
// but we don't need to introduce the depedency there. The disadvantage is that we have two maps, but we need
// to do a reverse lookup, so, it's a small price to pay, as this hash is quite small (255 when this was introduced)
private Map charMap = new HashMap(255);
private Map inverseCharMap = new HashMap(255);
protected EntityEncoder()
{
put(160, "nbsp");
put(161, "iexcl");
put(162, "cent");
put(163, "pound");
put(164, "curren");
put(165, "yen");
put(166, "brvbar");
put(167, "sect");
put(168, "uml");
put(169, "copy");
put(170, "ordf");
put(171, "laquo");
put(172, "not");
put(173, "shy");
put(174, "reg");
put(175, "macr");
put(176, "deg");
put(177, "plusmn");
put(178, "sup2");
put(179, "sup3");
put(180, "acute");
put(181, "micro");
put(182, "para");
put(183, "middot");
put(184, "cedil");
put(185, "sup1");
put(186, "ordm");
put(187, "raquo");
put(188, "frac14");
put(189, "frac12");
put(190, "frac34");
put(191, "iquest");
put(192, "Agrave");
put(193, "Aacute");
put(194, "Acirc");
put(195, "Atilde");
put(196, "Auml");
put(197, "Aring");
put(198, "AElig");
put(199, "Ccedil");
put(200, "Egrave");
put(201, "Eacute");
put(202, "Ecirc");
put(203, "Euml");
put(204, "Igrave");
put(205, "Iacute");
put(206, "Icirc");
put(207, "Iuml");
put(208, "ETH");
put(209, "Ntilde");
put(210, "Ograve");
put(211, "Oacute");
put(212, "Ocirc");
put(213, "Otilde");
put(214, "Ouml");
put(215, "times");
put(216, "Oslash");
put(217, "Ugrave");
put(218, "Uacute");
put(219, "Ucirc");
put(220, "Uuml");
put(221, "Yacute");
put(222, "THORN");
put(223, "szlig");
put(224, "agrave");
put(225, "aacute");
put(226, "acirc");
put(227, "atilde");
put(228, "auml");
put(229, "aring");
put(230, "aelig");
put(231, "ccedil");
put(232, "egrave");
put(233, "eacute");
put(234, "ecirc");
put(235, "euml");
put(236, "igrave");
put(237, "iacute");
put(238, "icirc");
put(239, "iuml");
put(240, "eth");
put(241, "ntilde");
put(242, "ograve");
put(243, "oacute");
put(244, "ocirc");
put(245, "otilde");
put(246, "ouml");
put(247, "divide");
put(248, "oslash");
put(249, "ugrave");
put(250, "uacute");
put(251, "ucirc");
put(252, "uuml");
put(253, "yacute");
put(254, "thorn");
put(255, "yuml");
put(402, "fnof");
put(913, "Alpha");
put(914, "Beta");
put(915, "Gamma");
put(916, "Delta");
put(917, "Epsilon");
put(918, "Zeta");
put(919, "Eta");
put(920, "Theta");
put(921, "Iota");
put(922, "Kappa");
put(923, "Lambda");
put(924, "Mu");
put(925, "Nu");
put(926, "Xi");
put(927, "Omicron");
put(928, "Pi");
put(929, "Rho");
put(931, "Sigma");
put(932, "Tau");
put(933, "Upsilon");
put(934, "Phi");
put(935, "Chi");
put(936, "Psi");
put(937, "Omega");
put(945, "alpha");
put(946, "beta");
put(947, "gamma");
put(948, "delta");
put(949, "epsilon");
put(950, "zeta");
put(951, "eta");
put(952, "theta");
put(953, "iota");
put(954, "kappa");
put(955, "lambda");
put(956, "mu");
put(957, "nu");
put(958, "xi");
put(959, "omicron");
put(960, "pi");
put(961, "rho");
put(962, "sigmaf");
put(963, "sigma");
put(964, "tau");
put(965, "upsilon");
put(966, "phi");
put(967, "chi");
put(968, "psi");
put(969, "omega");
put(977, "thetasym");
put(978, "upsih");
put(982, "piv");
put(8226, "bull");
put(8230, "hellip");
put(8242, "prime");
put(8243, "Prime");
put(8254, "oline");
put(8260, "frasl");
put(8472, "weierp");
put(8465, "image");
put(8476, "real");
put(8482, "trade");
put(8501, "alefsym");
put(8592, "larr");
put(8593, "uarr");
put(8594, "rarr");
put(8595, "darr");
put(8596, "harr");
put(8629, "crarr");
put(8656, "lArr");
put(8657, "uArr");
put(8658, "rArr");
put(8659, "dArr");
put(8660, "hArr");
put(8704, "forall");
put(8706, "part");
put(8707, "exist");
put(8709, "empty");
put(8711, "nabla");
put(8712, "isin");
put(8713, "notin");
put(8715, "ni");
put(8719, "prod");
put(8721, "sum");
put(8722, "minus");
put(8727, "lowast");
put(8730, "radic");
put(8733, "prop");
put(8734, "infin");
put(8736, "ang");
put(8743, "and");
put(8744, "or");
put(8745, "cap");
put(8746, "cup");
put(8747, "int");
put(8756, "there4");
put(8764, "sim");
put(8773, "cong");
put(8776, "asymp");
put(8800, "ne");
put(8801, "equiv");
put(8804, "le");
put(8805, "ge");
put(8834, "sub");
put(8835, "sup");
put(8836, "nsub");
put(8838, "sube");
put(8839, "supe");
put(8853, "oplus");
put(8855, "otimes");
put(8869, "perp");
put(8901, "sdot");
put(8968, "lceil");
put(8969, "rceil");
put(8970, "lfloor");
put(8971, "rfloor");
put(9001, "lang");
put(9002, "rang");
put(9674, "loz");
put(9824, "spades");
put(9827, "clubs");
put(9829, "hearts");
put(9830, "diams");
put(34, "quot");
put(38, "amp");
put(60, "lt");
put(62, "gt");
put(338, "OElig");
put(339, "oelig");
put(352, "Scaron");
put(353, "scaron");
put(376, "Yuml");
put(710, "circ");
put(732, "tilde");
put(8194, "ensp");
put(8195, "emsp");
put(8201, "thinsp");
put(8204, "zwnj");
put(8205, "zwj");
put(8206, "lrm");
put(8207, "rlm");
put(8211, "ndash");
put(8212, "mdash");
put(8216, "lsquo");
put(8217, "rsquo");
put(8218, "sbquo");
put(8220, "ldquo");
put(8221, "rdquo");
put(8222, "bdquo");
put(8224, "dagger");
put(8225, "Dagger");
put(8240, "permil");
put(8249, "lsaquo");
put(8250, "rsaquo");
put(8364, "euro");
}
protected final void put(int c, String entity)
{
charMap.put((char) c, entity);
inverseCharMap.put(entity, (char) c);
}
protected final void remove(int c)
{
inverseCharMap.remove(charMap.get((char) c));
charMap.remove((char) c);
}
/**
* Returns null if no entity is found or return the converted entity.
*
* @param c the char to encode
* @return the corresponding encoded string or null
*/
public final String lookup(char c)
{
return charMap.get(c);
}
/**
* Returns the char related to the provided string. For instance, ccedil as input returns ç (int 231).
* @param s the string to be reversed into the character
* @return the int code for the given string or -1, if it wasn't found
*/
public final int reverse(String s)
{
Character c = inverseCharMap.get(s);
return c == null ? -1 : c;
}
}