facebook4j.internal.http.HTMLEntity Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of facebook4j-core Show documentation
Show all versions of facebook4j-core Show documentation
A Java library for the Facebook Graph API
/*
* Copyright 2007 Yusuke Yamamoto
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package facebook4j.internal.http;
import java.util.HashMap;
import java.util.Map;
public final class HTMLEntity {
public static String escape(String original) {
StringBuilder buf = new StringBuilder(original);
escape(buf);
return buf.toString();
}
public static void escape(StringBuilder original) {
int index = 0;
String escaped;
while (index < original.length()) {
escaped = entityEscapeMap.get(original.substring(index, index + 1));
if (escaped != null) {
original.replace(index, index + 1, escaped);
index += escaped.length();
} else {
index++;
}
}
}
public static String unescape(String original) {
String returnValue = null;
if (original != null) {
StringBuilder buf = new StringBuilder(original);
unescape(buf);
returnValue = buf.toString();
}
return returnValue;
}
public static void unescape(StringBuilder original) {
int index = 0;
int semicolonIndex;
String escaped;
String entity;
while (index < original.length()) {
index = original.indexOf("&", index);
if (-1 == index) {
break;
}
semicolonIndex = original.indexOf(";", index);
if (-1 != semicolonIndex) {
escaped = original.substring(index, semicolonIndex + 1);
entity = escapeEntityMap.get(escaped);
if (entity != null) {
original.replace(index, semicolonIndex + 1, entity);
}
index++;
} else {
break;
}
}
}
private static Map entityEscapeMap = new HashMap();
private static Map escapeEntityMap = new HashMap();
static {
String[][] entities =
{{" ", " "/* no-break space = non-breaking space */, "\u00A0"}
, {"¡", "¡"/* inverted exclamation mark */, "\u00A1"}
, {"¢", "¢"/* cent sign */, "\u00A2"}
, {"£", "£"/* pound sign */, "\u00A3"}
, {"¤", "¤"/* currency sign */, "\u00A4"}
, {"¥", "¥"/* yen sign = yuan sign */, "\u00A5"}
, {"¦", "¦"/* broken bar = broken vertical bar */, "\u00A6"}
, {"§", "§"/* section sign */, "\u00A7"}
, {"¨", "¨"/* diaeresis = spacing diaeresis */, "\u00A8"}
, {"©", "©"/* copyright sign */, "\u00A9"}
, {"ª", "ª"/* feminine ordinal indicator */, "\u00AA"}
, {"«", "«"/* left-pointing double angle quotation mark = left pointing guillemet */, "\u00AB"}
, {"¬", "¬"/* not sign = discretionary hyphen */, "\u00AC"}
, {"", ""/* soft hyphen = discretionary hyphen */, "\u00AD"}
, {"®", "®"/* registered sign = registered trade mark sign */, "\u00AE"}
, {"¯", "¯"/* macron = spacing macron = overline = APL overbar */, "\u00AF"}
, {"°", "°"/* degree sign */, "\u00B0"}
, {"±", "±"/* plus-minus sign = plus-or-minus sign */, "\u00B1"}
, {"²", "²"/* superscript two = superscript digit two = squared */, "\u00B2"}
, {"³", "³"/* superscript three = superscript digit three = cubed */, "\u00B3"}
, {"´", "´"/* acute accent = spacing acute */, "\u00B4"}
, {"µ", "µ"/* micro sign */, "\u00B5"}
, {"¶", "¶"/* pilcrow sign = paragraph sign */, "\u00B6"}
, {"·", "·"/* middle dot = Georgian comma = Greek middle dot */, "\u00B7"}
, {"¸", "¸"/* cedilla = spacing cedilla */, "\u00B8"}
, {"¹", "¹"/* superscript one = superscript digit one */, "\u00B9"}
, {"º", "º"/* masculine ordinal indicator */, "\u00BA"}
, {"»", "»"/* right-pointing double angle quotation mark = right pointing guillemet */, "\u00BB"}
, {"¼", "¼"/* vulgar fraction one quarter = fraction one quarter */, "\u00BC"}
, {"½", "½"/* vulgar fraction one half = fraction one half */, "\u00BD"}
, {"¾", "¾"/* vulgar fraction three quarters = fraction three quarters */, "\u00BE"}
, {"¿", "¿"/* inverted question mark = turned question mark */, "\u00BF"}
, {"À", "À"/* latin capital letter A with grave = latin capital letter A grave */, "\u00C0"}
, {"Á", "Á"/* latin capital letter A with acute */, "\u00C1"}
, {"Â", "Â"/* latin capital letter A with circumflex */, "\u00C2"}
, {"Ã", "Ã"/* latin capital letter A with tilde */, "\u00C3"}
, {"Ä", "Ä"/* latin capital letter A with diaeresis */, "\u00C4"}
, {"Å", "Å"/* latin capital letter A with ring above = latin capital letter A ring */, "\u00C5"}
, {"Æ", "Æ"/* latin capital letter AE = latin capital ligature AE */, "\u00C6"}
, {"Ç", "Ç"/* latin capital letter C with cedilla */, "\u00C7"}
, {"È", "È"/* latin capital letter E with grave */, "\u00C8"}
, {"É", "É"/* latin capital letter E with acute */, "\u00C9"}
, {"Ê", "Ê"/* latin capital letter E with circumflex */, "\u00CA"}
, {"Ë", "Ë"/* latin capital letter E with diaeresis */, "\u00CB"}
, {"Ì", "Ì"/* latin capital letter I with grave */, "\u00CC"}
, {"Í", "Í"/* latin capital letter I with acute */, "\u00CD"}
, {"Î", "Î"/* latin capital letter I with circumflex */, "\u00CE"}
, {"Ï", "Ï"/* latin capital letter I with diaeresis */, "\u00CF"}
, {"Ð", "Ð"/* latin capital letter ETH */, "\u00D0"}
, {"Ñ", "Ñ"/* latin capital letter N with tilde */, "\u00D1"}
, {"Ò", "Ò"/* latin capital letter O with grave */, "\u00D2"}
, {"Ó", "Ó"/* latin capital letter O with acute */, "\u00D3"}
, {"Ô", "Ô"/* latin capital letter O with circumflex */, "\u00D4"}
, {"Õ", "Õ"/* latin capital letter O with tilde */, "\u00D5"}
, {"Ö", "Ö"/* latin capital letter O with diaeresis */, "\u00D6"}
, {"×", "×"/* multiplication sign */, "\u00D7"}
, {"Ø", "Ø"/* latin capital letter O with stroke = latin capital letter O slash */, "\u00D8"}
, {"Ù", "Ù"/* latin capital letter U with grave */, "\u00D9"}
, {"Ú", "Ú"/* latin capital letter U with acute */, "\u00DA"}
, {"Û", "Û"/* latin capital letter U with circumflex */, "\u00DB"}
, {"Ü", "Ü"/* latin capital letter U with diaeresis */, "\u00DC"}
, {"Ý", "Ý"/* latin capital letter Y with acute */, "\u00DD"}
, {"Þ", "Þ"/* latin capital letter THORN */, "\u00DE"}
, {"ß", "ß"/* latin small letter sharp s = ess-zed */, "\u00DF"}
, {"à", "à"/* latin small letter a with grave = latin small letter a grave */, "\u00E0"}
, {"á", "á"/* latin small letter a with acute */, "\u00E1"}
, {"â", "â"/* latin small letter a with circumflex */, "\u00E2"}
, {"ã", "ã"/* latin small letter a with tilde */, "\u00E3"}
, {"ä", "ä"/* latin small letter a with diaeresis */, "\u00E4"}
, {"å", "å"/* latin small letter a with ring above = latin small letter a ring */, "\u00E5"}
, {"æ", "æ"/* latin small letter ae = latin small ligature ae */, "\u00E6"}
, {"ç", "ç"/* latin small letter c with cedilla */, "\u00E7"}
, {"è", "è"/* latin small letter e with grave */, "\u00E8"}
, {"é", "é"/* latin small letter e with acute */, "\u00E9"}
, {"ê", "ê"/* latin small letter e with circumflex */, "\u00EA"}
, {"ë", "ë"/* latin small letter e with diaeresis */, "\u00EB"}
, {"ì", "ì"/* latin small letter i with grave */, "\u00EC"}
, {"í", "í"/* latin small letter i with acute */, "\u00ED"}
, {"î", "î"/* latin small letter i with circumflex */, "\u00EE"}
, {"ï", "ï"/* latin small letter i with diaeresis */, "\u00EF"}
, {"ð", "ð"/* latin small letter eth */, "\u00F0"}
, {"ñ", "ñ"/* latin small letter n with tilde */, "\u00F1"}
, {"ò", "ò"/* latin small letter o with grave */, "\u00F2"}
, {"ó", "ó"/* latin small letter o with acute */, "\u00F3"}
, {"ô", "ô"/* latin small letter o with circumflex */, "\u00F4"}
, {"õ", "õ"/* latin small letter o with tilde */, "\u00F5"}
, {"ö", "ö"/* latin small letter o with diaeresis */, "\u00F6"}
, {"÷", "÷"/* division sign */, "\u00F7"}
, {"ø", "ø"/* latin small letter o with stroke = latin small letter o slash */, "\u00F8"}
, {"ù", "ù"/* latin small letter u with grave */, "\u00F9"}
, {"ú", "ú"/* latin small letter u with acute */, "\u00FA"}
, {"û", "û"/* latin small letter u with circumflex */, "\u00FB"}
, {"ü", "ü"/* latin small letter u with diaeresis */, "\u00FC"}
, {"ý", "ý"/* latin small letter y with acute */, "\u00FD"}
, {"þ", "þ"/* latin small letter thorn with */, "\u00FE"}
, {"ÿ", "ÿ"/* latin small letter y with diaeresis */, "\u00FF"}
, {"ƒ", "ƒ"/* latin small f with hook = function = florin */, "\u0192"}
/* Greek */
, {"Α", "Α"/* greek capital letter alpha */, "\u0391"}
, {"Β", "Β"/* greek capital letter beta */, "\u0392"}
, {"Γ", "Γ"/* greek capital letter gamma */, "\u0393"}
, {"Δ", "Δ"/* greek capital letter delta */, "\u0394"}
, {"Ε", "Ε"/* greek capital letter epsilon */, "\u0395"}
, {"Ζ", "Ζ"/* greek capital letter zeta */, "\u0396"}
, {"Η", "Η"/* greek capital letter eta */, "\u0397"}
, {"Θ", "Θ"/* greek capital letter theta */, "\u0398"}
, {"Ι", "Ι"/* greek capital letter iota */, "\u0399"}
, {"Κ", "Κ"/* greek capital letter kappa */, "\u039A"}
, {"Λ", "Λ"/* greek capital letter lambda */, "\u039B"}
, {"Μ", "Μ"/* greek capital letter mu */, "\u039C"}
, {"Ν", "Ν"/* greek capital letter nu */, "\u039D"}
, {"Ξ", "Ξ"/* greek capital letter xi */, "\u039E"}
, {"Ο", "Ο"/* greek capital letter omicron */, "\u039F"}
, {"Π", "Π"/* greek capital letter pi */, "\u03A0"}
, {"Ρ", "Ρ"/* greek capital letter rho */, "\u03A1"}
/* there is no Sigmaf and no \u03A2 */
, {"Σ", "Σ"/* greek capital letter sigma */, "\u03A3"}
, {"Τ", "Τ"/* greek capital letter tau */, "\u03A4"}
, {"Υ", "Υ"/* greek capital letter upsilon */, "\u03A5"}
, {"Φ", "Φ"/* greek capital letter phi */, "\u03A6"}
, {"Χ", "Χ"/* greek capital letter chi */, "\u03A7"}
, {"Ψ", "Ψ"/* greek capital letter psi */, "\u03A8"}
, {"Ω", "Ω"/* greek capital letter omega */, "\u03A9"}
, {"α", "α"/* greek small letter alpha */, "\u03B1"}
, {"β", "β"/* greek small letter beta */, "\u03B2"}
, {"γ", "γ"/* greek small letter gamma */, "\u03B3"}
, {"δ", "δ"/* greek small letter delta */, "\u03B4"}
, {"ε", "ε"/* greek small letter epsilon */, "\u03B5"}
, {"ζ", "ζ"/* greek small letter zeta */, "\u03B6"}
, {"η", "η"/* greek small letter eta */, "\u03B7"}
, {"θ", "θ"/* greek small letter theta */, "\u03B8"}
, {"ι", "ι"/* greek small letter iota */, "\u03B9"}
, {"κ", "κ"/* greek small letter kappa */, "\u03BA"}
, {"λ", "λ"/* greek small letter lambda */, "\u03BB"}
, {"μ", "μ"/* greek small letter mu */, "\u03BC"}
, {"ν", "ν"/* greek small letter nu */, "\u03BD"}
, {"ξ", "ξ"/* greek small letter xi */, "\u03BE"}
, {"ο", "ο"/* greek small letter omicron */, "\u03BF"}
, {"π", "π"/* greek small letter pi */, "\u03C0"}
, {"ρ", "ρ"/* greek small letter rho */, "\u03C1"}
, {"ς", "ς"/* greek small letter final sigma */, "\u03C2"}
, {"σ", "σ"/* greek small letter sigma */, "\u03C3"}
, {"τ", "τ"/* greek small letter tau */, "\u03C4"}
, {"υ", "υ"/* greek small letter upsilon */, "\u03C5"}
, {"φ", "φ"/* greek small letter phi */, "\u03C6"}
, {"χ", "χ"/* greek small letter chi */, "\u03C7"}
, {"ψ", "ψ"/* greek small letter psi */, "\u03C8"}
, {"ω", "ω"/* greek small letter omega */, "\u03C9"}
, {"ϑ", "ϑ"/* greek small letter theta symbol */, "\u03D1"}
, {"ϒ", "ϒ"/* greek upsilon with hook symbol */, "\u03D2"}
, {"ϖ", "ϖ"/* greek pi symbol */, "\u03D6"}
/* General Punctuation */
, {"•", "•"/* bullet = black small circle */, "\u2022"}
/* bullet is NOT the same as bullet operator ,"\u2219*/
, {"…", "…"/* horizontal ellipsis = three dot leader */, "\u2026"}
, {"′", "′"/* prime = minutes = feet */, "\u2032"}
, {"″", "″"/* double prime = seconds = inches */, "\u2033"}
, {"‾", "‾"/* overline = spacing overscore */, "\u203E"}
, {"⁄", "⁄"/* fraction slash */, "\u2044"}
/* Letterlike Symbols */
, {"℘", "℘"/* script capital P = power set = Weierstrass p */, "\u2118"}
, {"ℑ", "ℑ"/* blackletter capital I = imaginary part */, "\u2111"}
, {"ℜ", "ℜ"/* blackletter capital R = real part symbol */, "\u211C"}
, {"™", "™"/* trade mark sign */, "\u2122"}
, {"ℵ", "ℵ"/* alef symbol = first transfinite cardinal */, "\u2135"}
/* alef symbol is NOT the same as hebrew letter alef ,"\u05D0"}*/
/* Arrows */
, {"←", "←"/* leftwards arrow */, "\u2190"}
, {"↑", "↑"/* upwards arrow */, "\u2191"}
, {"→", "→"/* rightwards arrow */, "\u2192"}
, {"↓", "↓"/* downwards arrow */, "\u2193"}
, {"↔", "↔"/* left right arrow */, "\u2194"}
, {"↵", "↵"/* downwards arrow with corner leftwards = carriage return */, "\u21B5"}
, {"⇐", "⇐"/* leftwards double arrow */, "\u21D0"}
/* Unicode does not say that lArr is the same as the 'is implied by' arrow but also does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests */
, {"⇑", "⇑"/* upwards double arrow */, "\u21D1"}
, {"⇒", "⇒"/* rightwards double arrow */, "\u21D2"}
/* Unicode does not say this is the 'implies' character but does not have another character with this function so ? rArr can be used for 'implies' as ISOtech suggests */
, {"⇓", "⇓"/* downwards double arrow */, "\u21D3"}
, {"⇔", "⇔"/* left right double arrow */, "\u21D4"}
/* Mathematical Operators */
, {"∀", "∀"/* for all */, "\u2200"}
, {"∂", "∂"/* partial differential */, "\u2202"}
, {"∃", "∃"/* there exists */, "\u2203"}
, {"∅", "∅"/* empty set = null set = diameter */, "\u2205"}
, {"∇", "∇"/* nabla = backward difference */, "\u2207"}
, {"∈", "∈"/* element of */, "\u2208"}
, {"∉", "∉"/* not an element of */, "\u2209"}
, {"∋", "∋"/* contains as member */, "\u220B"}
/* should there be a more memorable name than 'ni'? */
, {"∏", "∏"/* n-ary product = product sign */, "\u220F"}
/* prod is NOT the same character as ,"\u03A0"}*/
, {"∑", "∑"/* n-ary sumation */, "\u2211"}
/* sum is NOT the same character as ,"\u03A3"}*/
, {"−", "−"/* minus sign */, "\u2212"}
, {"∗", "∗"/* asterisk operator */, "\u2217"}
, {"√", "√"/* square root = radical sign */, "\u221A"}
, {"∝", "∝"/* proportional to */, "\u221D"}
, {"∞", "∞"/* infinity */, "\u221E"}
, {"∠", "∠"/* angle */, "\u2220"}
, {"∧", "∧"/* logical and = wedge */, "\u2227"}
, {"∨", "∨"/* logical or = vee */, "\u2228"}
, {"∩", "∩"/* intersection = cap */, "\u2229"}
, {"∪", "∪"/* union = cup */, "\u222A"}
, {"∫", "∫"/* integral */, "\u222B"}
, {"∴", "∴"/* therefore */, "\u2234"}
, {"∼", "∼"/* tilde operator = varies with = similar to */, "\u223C"}
/* tilde operator is NOT the same character as the tilde ,"\u007E"}*/
, {"≅", "≅"/* approximately equal to */, "\u2245"}
, {"≈", "≈"/* almost equal to = asymptotic to */, "\u2248"}
, {"≠", "≠"/* not equal to */, "\u2260"}
, {"≡", "≡"/* identical to */, "\u2261"}
, {"≤", "≤"/* less-than or equal to */, "\u2264"}
, {"≥", "≥"/* greater-than or equal to */, "\u2265"}
, {"⊂", "⊂"/* subset of */, "\u2282"}
, {"⊃", "⊃"/* superset of */, "\u2283"}
/* note that nsup 'not a superset of ,"\u2283"}*/
, {"⊆", "⊆"/* subset of or equal to */, "\u2286"}
, {"⊇", "⊇"/* superset of or equal to */, "\u2287"}
, {"⊕", "⊕"/* circled plus = direct sum */, "\u2295"}
, {"⊗", "⊗"/* circled times = vector product */, "\u2297"}
, {"⊥", "⊥"/* up tack = orthogonal to = perpendicular */, "\u22A5"}
, {"⋅", "⋅"/* dot operator */, "\u22C5"}
/* dot operator is NOT the same character as ,"\u00B7"}
/* Miscellaneous Technical */
, {"⌈", "⌈"/* left ceiling = apl upstile */, "\u2308"}
, {"⌉", "⌉"/* right ceiling */, "\u2309"}
, {"⌊", "⌊"/* left floor = apl downstile */, "\u230A"}
, {"⌋", "⌋"/* right floor */, "\u230B"}
, {"〈", "〈"/* left-pointing angle bracket = bra */, "\u2329"}
/* lang is NOT the same character as ,"\u003C"}*/
, {"〉", "〉"/* right-pointing angle bracket = ket */, "\u232A"}
/* rang is NOT the same character as ,"\u003E"}*/
/* Geometric Shapes */
, {"◊", "◊"/* lozenge */, "\u25CA"}
/* Miscellaneous Symbols */
, {"♠", "♠"/* black spade suit */, "\u2660"}
/* black here seems to mean filled as opposed to hollow */
, {"♣", "♣"/* black club suit = shamrock */, "\u2663"}
, {"♥", "♥"/* black heart suit = valentine */, "\u2665"}
, {"♦", "♦"/* black diamond suit */, "\u2666"}
, {""", """ /* quotation mark = APL quote */, "\""}
, {"&", "&" /* ampersand */, "\u0026"}
, {"<", "<" /* less-than sign */, "\u003C"}
, {">", ">" /* greater-than sign */, "\u003E"}
/* Latin Extended-A */
, {"Œ", "Œ" /* latin capital ligature OE */, "\u0152"}
, {"œ", "œ" /* latin small ligature oe */, "\u0153"}
/* ligature is a misnomer this is a separate character in some languages */
, {"Š", "Š" /* latin capital letter S with caron */, "\u0160"}
, {"š", "š" /* latin small letter s with caron */, "\u0161"}
, {"Ÿ", "Ÿ" /* latin capital letter Y with diaeresis */, "\u0178"}
/* Spacing Modifier Letters */
, {"ˆ", "ˆ" /* modifier letter circumflex accent */, "\u02C6"}
, {"˜", "˜" /* small tilde */, "\u02DC"}
/* General Punctuation */
, {" ", " "/* en space */, "\u2002"}
, {" ", " "/* em space */, "\u2003"}
, {" ", " "/* thin space */, "\u2009"}
, {"", ""/* zero width non-joiner */, "\u200C"}
, {"", ""/* zero width joiner */, "\u200D"}
, {"", ""/* left-to-right mark */, "\u200E"}
, {"", ""/* right-to-left mark */, "\u200F"}
, {"–", "–"/* en dash */, "\u2013"}
, {"—", "—"/* em dash */, "\u2014"}
, {"‘", "‘"/* left single quotation mark */, "\u2018"}
, {"’", "’"/* right single quotation mark */, "\u2019"}
, {"‚", "‚"/* single low-9 quotation mark */, "\u201A"}
, {"“", "“"/* left double quotation mark */, "\u201C"}
, {"”", "”"/* right double quotation mark */, "\u201D"}
, {"„", "„"/* double low-9 quotation mark */, "\u201E"}
, {"†", "†"/* dagger */, "\u2020"}
, {"‡", "‡"/* double dagger */, "\u2021"}
, {"‰", "‰"/* per mille sign */, "\u2030"}
, {"‹", "‹"/* single left-pointing angle quotation mark */, "\u2039"}
/* lsaquo is proposed but not yet ISO standardized */
, {"›", "›"/* single right-pointing angle quotation mark */, "\u203A"}
/* rsaquo is proposed but not yet ISO standardized */
, {"€", "€" /* euro sign */, "\u20AC"}};
for (String[] entity : entities) {
entityEscapeMap.put(entity[2], entity[0]);
escapeEntityMap.put(entity[0], entity[2]);
escapeEntityMap.put(entity[1], entity[2]);
}
}
}