Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.yahoo.text.HTML Maven / Gradle / Ivy
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.text;
import java.util.Map;
import java.util.HashMap;
/**
* Static HTML escaping stuff
*
* @author Bjorn Borud
*/
public class HTML {
static Object[][] entities = {
// {"#39", Integer.valueOf(39)}, // ' - apostrophe
{"quot", 34}, // " - double-quote
{"amp", 38}, // & - ampersand
{"lt", 60}, // < - less-than
{"gt", 62}, // > - greater-than
{"nbsp", 160}, // non-breaking space
{"copy", 169}, // \u00A9 - copyright
{"reg", 174}, // \u00AE - registered trademark
{"Agrave", 192}, // \u00C0 - uppercase A, grave accent
{"Aacute", 193}, // \u00C1 - uppercase A, acute accent
{"Acirc", 194}, // \u00C2 - uppercase A, circumflex accent
{"Atilde", 195}, // \u00C3 - uppercase A, tilde
{"Auml", 196}, // \u00C4 - uppercase A, umlaut
{"Aring", 197}, // \u00C5 - uppercase A, ring
{"AElig", 198}, // \u00C6 - uppercase AE
{"Ccedil", 199}, // \u00C7 - uppercase C, cedilla
{"Egrave", 200}, // \u00C8 - uppercase E, grave accent
{"Eacute", 201}, // \u00C9 - uppercase E, acute accent
{"Ecirc", 202}, // \u00CA - uppercase E, circumflex accent
{"Euml", 203}, // \u00CB - uppercase E, umlaut
{"Igrave", 204}, // \u00CC - uppercase I, grave accent
{"Iacute", 205}, // \u00CD - uppercase I, acute accent
{"Icirc", 206}, // \u00CE - uppercase I, circumflex accent
{"Iuml", 207}, // \u00CF - uppercase I, umlaut
{"ETH", 208}, // \u00D0 - uppercase Eth, Icelandic
{"Ntilde", 209}, // \u00D1 - uppercase N, tilde
{"Ograve", 210}, // \u00D2 - uppercase O, grave accent
{"Oacute", 211}, // \u00D3 - uppercase O, acute accent
{"Ocirc", 212}, // \u00D4 - uppercase O, circumflex accent
{"Otilde", 213}, // \u00D5 - uppercase O, tilde
{"Ouml", 214}, // \u00D6 - uppercase O, umlaut
{"Oslash", 216}, // \u00D8 - uppercase O, slash
{"Ugrave", 217}, // \u00D9 - uppercase U, grave accent
{"Uacute", 218}, // \u00DA - uppercase U, acute accent
{"Ucirc", 219}, // \u00DB - uppercase U, circumflex accent
{"Uuml", 220}, // \u00DC - uppercase U, umlaut
{"Yacute", 221}, // \u00DD - uppercase Y, acute accent
{"THORN", 222}, // \u00DE - uppercase THORN, Icelandic
{"szlig", 223}, // \u00DF - lowercase sharps, German
{"agrave", 224}, // \u00E0 - lowercase a, grave accent
{"aacute", 225}, // \u00E1 - lowercase a, acute accent
{"acirc", 226}, // \u00E2 - lowercase a, circumflex accent
{"atilde", 227}, // \u00E3 - lowercase a, tilde
{"auml", 228}, // \u00E4 - lowercase a, umlaut
{"aring", 229}, // \u00E5 - lowercase a, ring
{"aelig", 230}, // \u00E6 - lowercase ae
{"ccedil", 231}, // \u00E7 - lowercase c, cedilla
{"egrave", 232}, // \u00E8 - lowercase e, grave accent
{"eacute", 233}, // \u00E9 - lowercase e, acute accent
{"ecirc", 234}, // \u00EA - lowercase e, circumflex accent
{"euml", 235}, // \u00EB - lowercase e, umlaut
{"igrave", 236}, // \u00EC - lowercase i, grave accent
{"iacute", 237}, // \u00ED - lowercase i, acute accent
{"icirc", 238}, // \u00EE - lowercase i, circumflex accent
{"iuml", 239}, // \u00EF - lowercase i, umlaut
{"igrave", 236}, // \u00EC - lowercase i, grave accent
{"iacute", 237}, // \u00ED - lowercase i, acute accent
{"icirc", 238}, // \u00EE - lowercase i, circumflex accent
{"iuml", 239}, // \u00EF - lowercase i, umlaut
{"eth", 240}, // \u00F0 - lowercase eth, Icelandic
{"ntilde", 241}, // \u00F1 - lowercase n, tilde
{"ograve", 242}, // \u00F2 - lowercase o, grave accent
{"oacute", 243}, // \u00F3 - lowercase o, acute accent
{"ocirc", 244}, // \u00F4 - lowercase o, circumflex accent
{"otilde", 245}, // \u00F5 - lowercase o, tilde
{"ouml", 246}, // \u00F6 - lowercase o, umlaut
{"oslash", 248}, // \u00F8 - lowercase o, slash
{"ugrave", 249}, // \u00F9 - lowercase u, grave accent
{"uacute", 250}, // \u00FA - lowercase u, acute accent
{"ucirc", 251}, // \u00FB - lowercase u, circumflex accent
{"uuml", 252}, // \u00FC - lowercase u, umlaut
{"yacute", 253}, // \u00FD - lowercase y, acute accent
{"thorn", 254}, // \u00FE - lowercase thorn, Icelandic
{"yuml", 255}, // \u00FF - lowercase y, umlaut
{"euro", 8364}, // Euro symbol
};
static Map e2i = new HashMap<>();
static Map i2e = new HashMap<>();
static {
for (Object[] entity : entities) {
e2i.put((String) entity[0], (Integer) entity[1]);
i2e.put((Integer) entity[1], (String) entity[0]);
}
}
public static String htmlescape(String s1) {
if (s1 == null) return "";
int len = s1.length();
// about 20% guess
StringBuilder buf = new StringBuilder((int) (len * 1.2));
int i;
for (i = 0; i < len; ++i) {
char ch = s1.charAt(i);
String entity = i2e.get((int) ch);
if (entity == null) {
if (((int) ch) > 128) buf.append("").append((int) ch).append(";");
else buf.append(ch);
} else {
buf.append("&").append(entity).append(";");
}
}
return buf.toString();
}
}