All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.owasp.html.HtmlEntities Maven / Gradle / Ivy

There is a newer version: 20240325.1
Show newest version
// Copyright (c) 2011, Mike Samuel
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// Neither the name of the OWASP nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

package org.owasp.html;

import java.util.Map;

import com.google.common.collect.ImmutableMap;

/**
 * Utilities for decoding HTML entities, e.g., {@code &}.
 */
final class HtmlEntities {

  /** A trie that maps entity names to strings of referenced code points. */
  public static final Trie ENTITY_TRIE;

  private static final int LONGEST_ENTITY_NAME;

  static {
    // Source data: https://html.spec.whatwg.org/multipage/named-characters.html
    // More readable: https://html.spec.whatwg.org/entities.json
    String[] pairs = {
      "AElig", "\u00c6",
      "AElig;", "\u00c6",
      "AMP", "\u0026",
      "AMP;", "\u0026",
      "Aacute", "\u00c1",
      "Aacute;", "\u00c1",
      "Abreve;", "\u0102",
      "Acirc", "\u00c2",
      "Acirc;", "\u00c2",
      "Acy;", "\u0410",
      "Afr;", "\ud835\udd04",
      "Agrave", "\u00c0",
      "Agrave;", "\u00c0",
      "Alpha;", "\u0391",
      "Amacr;", "\u0100",
      "And;", "\u2a53",
      "Aogon;", "\u0104",
      "Aopf;", "\ud835\udd38",
      "ApplyFunction;", "\u2061",
      "Aring", "\u00c5",
      "Aring;", "\u00c5",
      "Ascr;", "\ud835\udc9c",
      "Assign;", "\u2254",
      "Atilde", "\u00c3",
      "Atilde;", "\u00c3",
      "Auml", "\u00c4",
      "Auml;", "\u00c4",
      "Backslash;", "\u2216",
      "Barv;", "\u2ae7",
      "Barwed;", "\u2306",
      "Bcy;", "\u0411",
      "Because;", "\u2235",
      "Bernoullis;", "\u212c",
      "Beta;", "\u0392",
      "Bfr;", "\ud835\udd05",
      "Bopf;", "\ud835\udd39",
      "Breve;", "\u02d8",
      "Bscr;", "\u212c",
      "Bumpeq;", "\u224e",
      "CHcy;", "\u0427",
      "COPY", "\u00a9",
      "COPY;", "\u00a9",
      "Cacute;", "\u0106",
      "Cap;", "\u22d2",
      "CapitalDifferentialD;", "\u2145",
      "Cayleys;", "\u212d",
      "Ccaron;", "\u010c",
      "Ccedil", "\u00c7",
      "Ccedil;", "\u00c7",
      "Ccirc;", "\u0108",
      "Cconint;", "\u2230",
      "Cdot;", "\u010a",
      "Cedilla;", "\u00b8",
      "CenterDot;", "\u00b7",
      "Cfr;", "\u212d",
      "Chi;", "\u03a7",
      "CircleDot;", "\u2299",
      "CircleMinus;", "\u2296",
      "CirclePlus;", "\u2295",
      "CircleTimes;", "\u2297",
      "ClockwiseContourIntegral;", "\u2232",
      "CloseCurlyDoubleQuote;", "\u201d",
      "CloseCurlyQuote;", "\u2019",
      "Colon;", "\u2237",
      "Colone;", "\u2a74",
      "Congruent;", "\u2261",
      "Conint;", "\u222f",
      "ContourIntegral;", "\u222e",
      "Copf;", "\u2102",
      "Coproduct;", "\u2210",
      "CounterClockwiseContourIntegral;", "\u2233",
      "Cross;", "\u2a2f",
      "Cscr;", "\ud835\udc9e",
      "Cup;", "\u22d3",
      "CupCap;", "\u224d",
      "DD;", "\u2145",
      "DDotrahd;", "\u2911",
      "DJcy;", "\u0402",
      "DScy;", "\u0405",
      "DZcy;", "\u040f",
      "Dagger;", "\u2021",
      "Darr;", "\u21a1",
      "Dashv;", "\u2ae4",
      "Dcaron;", "\u010e",
      "Dcy;", "\u0414",
      "Del;", "\u2207",
      "Delta;", "\u0394",
      "Dfr;", "\ud835\udd07",
      "DiacriticalAcute;", "\u00b4",
      "DiacriticalDot;", "\u02d9",
      "DiacriticalDoubleAcute;", "\u02dd",
      "DiacriticalGrave;", "\u0060",
      "DiacriticalTilde;", "\u02dc",
      "Diamond;", "\u22c4",
      "DifferentialD;", "\u2146",
      "Dopf;", "\ud835\udd3b",
      "Dot;", "\u00a8",
      "DotDot;", "\u20dc",
      "DotEqual;", "\u2250",
      "DoubleContourIntegral;", "\u222f",
      "DoubleDot;", "\u00a8",
      "DoubleDownArrow;", "\u21d3",
      "DoubleLeftArrow;", "\u21d0",
      "DoubleLeftRightArrow;", "\u21d4",
      "DoubleLeftTee;", "\u2ae4",
      "DoubleLongLeftArrow;", "\u27f8",
      "DoubleLongLeftRightArrow;", "\u27fa",
      "DoubleLongRightArrow;", "\u27f9",
      "DoubleRightArrow;", "\u21d2",
      "DoubleRightTee;", "\u22a8",
      "DoubleUpArrow;", "\u21d1",
      "DoubleUpDownArrow;", "\u21d5",
      "DoubleVerticalBar;", "\u2225",
      "DownArrow;", "\u2193",
      "DownArrowBar;", "\u2913",
      "DownArrowUpArrow;", "\u21f5",
      "DownBreve;", "\u0311",
      "DownLeftRightVector;", "\u2950",
      "DownLeftTeeVector;", "\u295e",
      "DownLeftVector;", "\u21bd",
      "DownLeftVectorBar;", "\u2956",
      "DownRightTeeVector;", "\u295f",
      "DownRightVector;", "\u21c1",
      "DownRightVectorBar;", "\u2957",
      "DownTee;", "\u22a4",
      "DownTeeArrow;", "\u21a7",
      "Downarrow;", "\u21d3",
      "Dscr;", "\ud835\udc9f",
      "Dstrok;", "\u0110",
      "ENG;", "\u014a",
      "ETH", "\u00d0",
      "ETH;", "\u00d0",
      "Eacute", "\u00c9",
      "Eacute;", "\u00c9",
      "Ecaron;", "\u011a",
      "Ecirc", "\u00ca",
      "Ecirc;", "\u00ca",
      "Ecy;", "\u042d",
      "Edot;", "\u0116",
      "Efr;", "\ud835\udd08",
      "Egrave", "\u00c8",
      "Egrave;", "\u00c8",
      "Element;", "\u2208",
      "Emacr;", "\u0112",
      "EmptySmallSquare;", "\u25fb",
      "EmptyVerySmallSquare;", "\u25ab",
      "Eogon;", "\u0118",
      "Eopf;", "\ud835\udd3c",
      "Epsilon;", "\u0395",
      "Equal;", "\u2a75",
      "EqualTilde;", "\u2242",
      "Equilibrium;", "\u21cc",
      "Escr;", "\u2130",
      "Esim;", "\u2a73",
      "Eta;", "\u0397",
      "Euml", "\u00cb",
      "Euml;", "\u00cb",
      "Exists;", "\u2203",
      "ExponentialE;", "\u2147",
      "Fcy;", "\u0424",
      "Ffr;", "\ud835\udd09",
      "FilledSmallSquare;", "\u25fc",
      "FilledVerySmallSquare;", "\u25aa",
      "Fopf;", "\ud835\udd3d",
      "ForAll;", "\u2200",
      "Fouriertrf;", "\u2131",
      "Fscr;", "\u2131",
      "GJcy;", "\u0403",
      "GT", "\u003e",
      "GT;", "\u003e",
      "Gamma;", "\u0393",
      "Gammad;", "\u03dc",
      "Gbreve;", "\u011e",
      "Gcedil;", "\u0122",
      "Gcirc;", "\u011c",
      "Gcy;", "\u0413",
      "Gdot;", "\u0120",
      "Gfr;", "\ud835\udd0a",
      "Gg;", "\u22d9",
      "Gopf;", "\ud835\udd3e",
      "GreaterEqual;", "\u2265",
      "GreaterEqualLess;", "\u22db",
      "GreaterFullEqual;", "\u2267",
      "GreaterGreater;", "\u2aa2",
      "GreaterLess;", "\u2277",
      "GreaterSlantEqual;", "\u2a7e",
      "GreaterTilde;", "\u2273",
      "Gscr;", "\ud835\udca2",
      "Gt;", "\u226b",
      "HARDcy;", "\u042a",
      "Hacek;", "\u02c7",
      "Hat;", "\u005e",
      "Hcirc;", "\u0124",
      "Hfr;", "\u210c",
      "HilbertSpace;", "\u210b",
      "Hopf;", "\u210d",
      "HorizontalLine;", "\u2500",
      "Hscr;", "\u210b",
      "Hstrok;", "\u0126",
      "HumpDownHump;", "\u224e",
      "HumpEqual;", "\u224f",
      "IEcy;", "\u0415",
      "IJlig;", "\u0132",
      "IOcy;", "\u0401",
      "Iacute", "\u00cd",
      "Iacute;", "\u00cd",
      "Icirc", "\u00ce",
      "Icirc;", "\u00ce",
      "Icy;", "\u0418",
      "Idot;", "\u0130",
      "Ifr;", "\u2111",
      "Igrave", "\u00cc",
      "Igrave;", "\u00cc",
      "Im;", "\u2111",
      "Imacr;", "\u012a",
      "ImaginaryI;", "\u2148",
      "Implies;", "\u21d2",
      "Int;", "\u222c",
      "Integral;", "\u222b",
      "Intersection;", "\u22c2",
      "InvisibleComma;", "\u2063",
      "InvisibleTimes;", "\u2062",
      "Iogon;", "\u012e",
      "Iopf;", "\ud835\udd40",
      "Iota;", "\u0399",
      "Iscr;", "\u2110",
      "Itilde;", "\u0128",
      "Iukcy;", "\u0406",
      "Iuml", "\u00cf",
      "Iuml;", "\u00cf",
      "Jcirc;", "\u0134",
      "Jcy;", "\u0419",
      "Jfr;", "\ud835\udd0d",
      "Jopf;", "\ud835\udd41",
      "Jscr;", "\ud835\udca5",
      "Jsercy;", "\u0408",
      "Jukcy;", "\u0404",
      "KHcy;", "\u0425",
      "KJcy;", "\u040c",
      "Kappa;", "\u039a",
      "Kcedil;", "\u0136",
      "Kcy;", "\u041a",
      "Kfr;", "\ud835\udd0e",
      "Kopf;", "\ud835\udd42",
      "Kscr;", "\ud835\udca6",
      "LJcy;", "\u0409",
      "LT", "\u003c",
      "LT;", "\u003c",
      "Lacute;", "\u0139",
      "Lambda;", "\u039b",
      "Lang;", "\u27ea",
      "Laplacetrf;", "\u2112",
      "Larr;", "\u219e",
      "Lcaron;", "\u013d",
      "Lcedil;", "\u013b",
      "Lcy;", "\u041b",
      "LeftAngleBracket;", "\u27e8",
      "LeftArrow;", "\u2190",
      "LeftArrowBar;", "\u21e4",
      "LeftArrowRightArrow;", "\u21c6",
      "LeftCeiling;", "\u2308",
      "LeftDoubleBracket;", "\u27e6",
      "LeftDownTeeVector;", "\u2961",
      "LeftDownVector;", "\u21c3",
      "LeftDownVectorBar;", "\u2959",
      "LeftFloor;", "\u230a",
      "LeftRightArrow;", "\u2194",
      "LeftRightVector;", "\u294e",
      "LeftTee;", "\u22a3",
      "LeftTeeArrow;", "\u21a4",
      "LeftTeeVector;", "\u295a",
      "LeftTriangle;", "\u22b2",
      "LeftTriangleBar;", "\u29cf",
      "LeftTriangleEqual;", "\u22b4",
      "LeftUpDownVector;", "\u2951",
      "LeftUpTeeVector;", "\u2960",
      "LeftUpVector;", "\u21bf",
      "LeftUpVectorBar;", "\u2958",
      "LeftVector;", "\u21bc",
      "LeftVectorBar;", "\u2952",
      "Leftarrow;", "\u21d0",
      "Leftrightarrow;", "\u21d4",
      "LessEqualGreater;", "\u22da",
      "LessFullEqual;", "\u2266",
      "LessGreater;", "\u2276",
      "LessLess;", "\u2aa1",
      "LessSlantEqual;", "\u2a7d",
      "LessTilde;", "\u2272",
      "Lfr;", "\ud835\udd0f",
      "Ll;", "\u22d8",
      "Lleftarrow;", "\u21da",
      "Lmidot;", "\u013f",
      "LongLeftArrow;", "\u27f5",
      "LongLeftRightArrow;", "\u27f7",
      "LongRightArrow;", "\u27f6",
      "Longleftarrow;", "\u27f8",
      "Longleftrightarrow;", "\u27fa",
      "Longrightarrow;", "\u27f9",
      "Lopf;", "\ud835\udd43",
      "LowerLeftArrow;", "\u2199",
      "LowerRightArrow;", "\u2198",
      "Lscr;", "\u2112",
      "Lsh;", "\u21b0",
      "Lstrok;", "\u0141",
      "Lt;", "\u226a",
      "Map;", "\u2905",
      "Mcy;", "\u041c",
      "MediumSpace;", "\u205f",
      "Mellintrf;", "\u2133",
      "Mfr;", "\ud835\udd10",
      "MinusPlus;", "\u2213",
      "Mopf;", "\ud835\udd44",
      "Mscr;", "\u2133",
      "Mu;", "\u039c",
      "NJcy;", "\u040a",
      "Nacute;", "\u0143",
      "Ncaron;", "\u0147",
      "Ncedil;", "\u0145",
      "Ncy;", "\u041d",
      "NegativeMediumSpace;", "\u200b",
      "NegativeThickSpace;", "\u200b",
      "NegativeThinSpace;", "\u200b",
      "NegativeVeryThinSpace;", "\u200b",
      "NestedGreaterGreater;", "\u226b",
      "NestedLessLess;", "\u226a",
      "NewLine;", "\n",
      "Nfr;", "\ud835\udd11",
      "NoBreak;", "\u2060",
      "NonBreakingSpace;", "\u00a0",
      "Nopf;", "\u2115",
      "Not;", "\u2aec",
      "NotCongruent;", "\u2262",
      "NotCupCap;", "\u226d",
      "NotDoubleVerticalBar;", "\u2226",
      "NotElement;", "\u2209",
      "NotEqual;", "\u2260",
      "NotEqualTilde;", "\u2242\u0338",
      "NotExists;", "\u2204",
      "NotGreater;", "\u226f",
      "NotGreaterEqual;", "\u2271",
      "NotGreaterFullEqual;", "\u2267\u0338",
      "NotGreaterGreater;", "\u226b\u0338",
      "NotGreaterLess;", "\u2279",
      "NotGreaterSlantEqual;", "\u2a7e\u0338",
      "NotGreaterTilde;", "\u2275",
      "NotHumpDownHump;", "\u224e\u0338",
      "NotHumpEqual;", "\u224f\u0338",
      "NotLeftTriangle;", "\u22ea",
      "NotLeftTriangleBar;", "\u29cf\u0338",
      "NotLeftTriangleEqual;", "\u22ec",
      "NotLess;", "\u226e",
      "NotLessEqual;", "\u2270",
      "NotLessGreater;", "\u2278",
      "NotLessLess;", "\u226a\u0338",
      "NotLessSlantEqual;", "\u2a7d\u0338",
      "NotLessTilde;", "\u2274",
      "NotNestedGreaterGreater;", "\u2aa2\u0338",
      "NotNestedLessLess;", "\u2aa1\u0338",
      "NotPrecedes;", "\u2280",
      "NotPrecedesEqual;", "\u2aaf\u0338",
      "NotPrecedesSlantEqual;", "\u22e0",
      "NotReverseElement;", "\u220c",
      "NotRightTriangle;", "\u22eb",
      "NotRightTriangleBar;", "\u29d0\u0338",
      "NotRightTriangleEqual;", "\u22ed",
      "NotSquareSubset;", "\u228f\u0338",
      "NotSquareSubsetEqual;", "\u22e2",
      "NotSquareSuperset;", "\u2290\u0338",
      "NotSquareSupersetEqual;", "\u22e3",
      "NotSubset;", "\u2282\u20d2",
      "NotSubsetEqual;", "\u2288",
      "NotSucceeds;", "\u2281",
      "NotSucceedsEqual;", "\u2ab0\u0338",
      "NotSucceedsSlantEqual;", "\u22e1",
      "NotSucceedsTilde;", "\u227f\u0338",
      "NotSuperset;", "\u2283\u20d2",
      "NotSupersetEqual;", "\u2289",
      "NotTilde;", "\u2241",
      "NotTildeEqual;", "\u2244",
      "NotTildeFullEqual;", "\u2247",
      "NotTildeTilde;", "\u2249",
      "NotVerticalBar;", "\u2224",
      "Nscr;", "\ud835\udca9",
      "Ntilde", "\u00d1",
      "Ntilde;", "\u00d1",
      "Nu;", "\u039d",
      "OElig;", "\u0152",
      "Oacute", "\u00d3",
      "Oacute;", "\u00d3",
      "Ocirc", "\u00d4",
      "Ocirc;", "\u00d4",
      "Ocy;", "\u041e",
      "Odblac;", "\u0150",
      "Ofr;", "\ud835\udd12",
      "Ograve", "\u00d2",
      "Ograve;", "\u00d2",
      "Omacr;", "\u014c",
      "Omega;", "\u03a9",
      "Omicron;", "\u039f",
      "Oopf;", "\ud835\udd46",
      "OpenCurlyDoubleQuote;", "\u201c",
      "OpenCurlyQuote;", "\u2018",
      "Or;", "\u2a54",
      "Oscr;", "\ud835\udcaa",
      "Oslash", "\u00d8",
      "Oslash;", "\u00d8",
      "Otilde", "\u00d5",
      "Otilde;", "\u00d5",
      "Otimes;", "\u2a37",
      "Ouml", "\u00d6",
      "Ouml;", "\u00d6",
      "OverBar;", "\u203e",
      "OverBrace;", "\u23de",
      "OverBracket;", "\u23b4",
      "OverParenthesis;", "\u23dc",
      "PartialD;", "\u2202",
      "Pcy;", "\u041f",
      "Pfr;", "\ud835\udd13",
      "Phi;", "\u03a6",
      "Pi;", "\u03a0",
      "PlusMinus;", "\u00b1",
      "Poincareplane;", "\u210c",
      "Popf;", "\u2119",
      "Pr;", "\u2abb",
      "Precedes;", "\u227a",
      "PrecedesEqual;", "\u2aaf",
      "PrecedesSlantEqual;", "\u227c",
      "PrecedesTilde;", "\u227e",
      "Prime;", "\u2033",
      "Product;", "\u220f",
      "Proportion;", "\u2237",
      "Proportional;", "\u221d",
      "Pscr;", "\ud835\udcab",
      "Psi;", "\u03a8",
      "QUOT", "\"",
      "QUOT;", "\"",
      "Qfr;", "\ud835\udd14",
      "Qopf;", "\u211a",
      "Qscr;", "\ud835\udcac",
      "RBarr;", "\u2910",
      "REG", "\u00ae",
      "REG;", "\u00ae",
      "Racute;", "\u0154",
      "Rang;", "\u27eb",
      "Rarr;", "\u21a0",
      "Rarrtl;", "\u2916",
      "Rcaron;", "\u0158",
      "Rcedil;", "\u0156",
      "Rcy;", "\u0420",
      "Re;", "\u211c",
      "ReverseElement;", "\u220b",
      "ReverseEquilibrium;", "\u21cb",
      "ReverseUpEquilibrium;", "\u296f",
      "Rfr;", "\u211c",
      "Rho;", "\u03a1",
      "RightAngleBracket;", "\u27e9",
      "RightArrow;", "\u2192",
      "RightArrowBar;", "\u21e5",
      "RightArrowLeftArrow;", "\u21c4",
      "RightCeiling;", "\u2309",
      "RightDoubleBracket;", "\u27e7",
      "RightDownTeeVector;", "\u295d",
      "RightDownVector;", "\u21c2",
      "RightDownVectorBar;", "\u2955",
      "RightFloor;", "\u230b",
      "RightTee;", "\u22a2",
      "RightTeeArrow;", "\u21a6",
      "RightTeeVector;", "\u295b",
      "RightTriangle;", "\u22b3",
      "RightTriangleBar;", "\u29d0",
      "RightTriangleEqual;", "\u22b5",
      "RightUpDownVector;", "\u294f",
      "RightUpTeeVector;", "\u295c",
      "RightUpVector;", "\u21be",
      "RightUpVectorBar;", "\u2954",
      "RightVector;", "\u21c0",
      "RightVectorBar;", "\u2953",
      "Rightarrow;", "\u21d2",
      "Ropf;", "\u211d",
      "RoundImplies;", "\u2970",
      "Rrightarrow;", "\u21db",
      "Rscr;", "\u211b",
      "Rsh;", "\u21b1",
      "RuleDelayed;", "\u29f4",
      "SHCHcy;", "\u0429",
      "SHcy;", "\u0428",
      "SOFTcy;", "\u042c",
      "Sacute;", "\u015a",
      "Sc;", "\u2abc",
      "Scaron;", "\u0160",
      "Scedil;", "\u015e",
      "Scirc;", "\u015c",
      "Scy;", "\u0421",
      "Sfr;", "\ud835\udd16",
      "ShortDownArrow;", "\u2193",
      "ShortLeftArrow;", "\u2190",
      "ShortRightArrow;", "\u2192",
      "ShortUpArrow;", "\u2191",
      "Sigma;", "\u03a3",
      "SmallCircle;", "\u2218",
      "Sopf;", "\ud835\udd4a",
      "Sqrt;", "\u221a",
      "Square;", "\u25a1",
      "SquareIntersection;", "\u2293",
      "SquareSubset;", "\u228f",
      "SquareSubsetEqual;", "\u2291",
      "SquareSuperset;", "\u2290",
      "SquareSupersetEqual;", "\u2292",
      "SquareUnion;", "\u2294",
      "Sscr;", "\ud835\udcae",
      "Star;", "\u22c6",
      "Sub;", "\u22d0",
      "Subset;", "\u22d0",
      "SubsetEqual;", "\u2286",
      "Succeeds;", "\u227b",
      "SucceedsEqual;", "\u2ab0",
      "SucceedsSlantEqual;", "\u227d",
      "SucceedsTilde;", "\u227f",
      "SuchThat;", "\u220b",
      "Sum;", "\u2211",
      "Sup;", "\u22d1",
      "Superset;", "\u2283",
      "SupersetEqual;", "\u2287",
      "Supset;", "\u22d1",
      "THORN", "\u00de",
      "THORN;", "\u00de",
      "TRADE;", "\u2122",
      "TSHcy;", "\u040b",
      "TScy;", "\u0426",
      "Tab;", "\u0009",
      "Tau;", "\u03a4",
      "Tcaron;", "\u0164",
      "Tcedil;", "\u0162",
      "Tcy;", "\u0422",
      "Tfr;", "\ud835\udd17",
      "Therefore;", "\u2234",
      "Theta;", "\u0398",
      "ThickSpace;", "\u205f\u200a",
      "ThinSpace;", "\u2009",
      "Tilde;", "\u223c",
      "TildeEqual;", "\u2243",
      "TildeFullEqual;", "\u2245",
      "TildeTilde;", "\u2248",
      "Topf;", "\ud835\udd4b",
      "TripleDot;", "\u20db",
      "Tscr;", "\ud835\udcaf",
      "Tstrok;", "\u0166",
      "Uacute", "\u00da",
      "Uacute;", "\u00da",
      "Uarr;", "\u219f",
      "Uarrocir;", "\u2949",
      "Ubrcy;", "\u040e",
      "Ubreve;", "\u016c",
      "Ucirc", "\u00db",
      "Ucirc;", "\u00db",
      "Ucy;", "\u0423",
      "Udblac;", "\u0170",
      "Ufr;", "\ud835\udd18",
      "Ugrave", "\u00d9",
      "Ugrave;", "\u00d9",
      "Umacr;", "\u016a",
      "UnderBar;", "\u005f",
      "UnderBrace;", "\u23df",
      "UnderBracket;", "\u23b5",
      "UnderParenthesis;", "\u23dd",
      "Union;", "\u22c3",
      "UnionPlus;", "\u228e",
      "Uogon;", "\u0172",
      "Uopf;", "\ud835\udd4c",
      "UpArrow;", "\u2191",
      "UpArrowBar;", "\u2912",
      "UpArrowDownArrow;", "\u21c5",
      "UpDownArrow;", "\u2195",
      "UpEquilibrium;", "\u296e",
      "UpTee;", "\u22a5",
      "UpTeeArrow;", "\u21a5",
      "Uparrow;", "\u21d1",
      "Updownarrow;", "\u21d5",
      "UpperLeftArrow;", "\u2196",
      "UpperRightArrow;", "\u2197",
      "Upsi;", "\u03d2",
      "Upsilon;", "\u03a5",
      "Uring;", "\u016e",
      "Uscr;", "\ud835\udcb0",
      "Utilde;", "\u0168",
      "Uuml", "\u00dc",
      "Uuml;", "\u00dc",
      "VDash;", "\u22ab",
      "Vbar;", "\u2aeb",
      "Vcy;", "\u0412",
      "Vdash;", "\u22a9",
      "Vdashl;", "\u2ae6",
      "Vee;", "\u22c1",
      "Verbar;", "\u2016",
      "Vert;", "\u2016",
      "VerticalBar;", "\u2223",
      "VerticalLine;", "\u007c",
      "VerticalSeparator;", "\u2758",
      "VerticalTilde;", "\u2240",
      "VeryThinSpace;", "\u200a",
      "Vfr;", "\ud835\udd19",
      "Vopf;", "\ud835\udd4d",
      "Vscr;", "\ud835\udcb1",
      "Vvdash;", "\u22aa",
      "Wcirc;", "\u0174",
      "Wedge;", "\u22c0",
      "Wfr;", "\ud835\udd1a",
      "Wopf;", "\ud835\udd4e",
      "Wscr;", "\ud835\udcb2",
      "Xfr;", "\ud835\udd1b",
      "Xi;", "\u039e",
      "Xopf;", "\ud835\udd4f",
      "Xscr;", "\ud835\udcb3",
      "YAcy;", "\u042f",
      "YIcy;", "\u0407",
      "YUcy;", "\u042e",
      "Yacute", "\u00dd",
      "Yacute;", "\u00dd",
      "Ycirc;", "\u0176",
      "Ycy;", "\u042b",
      "Yfr;", "\ud835\udd1c",
      "Yopf;", "\ud835\udd50",
      "Yscr;", "\ud835\udcb4",
      "Yuml;", "\u0178",
      "ZHcy;", "\u0416",
      "Zacute;", "\u0179",
      "Zcaron;", "\u017d",
      "Zcy;", "\u0417",
      "Zdot;", "\u017b",
      "ZeroWidthSpace;", "\u200b",
      "Zeta;", "\u0396",
      "Zfr;", "\u2128",
      "Zopf;", "\u2124",
      "Zscr;", "\ud835\udcb5",
      "aacute", "\u00e1",
      "aacute;", "\u00e1",
      "abreve;", "\u0103",
      "ac;", "\u223e",
      "acE;", "\u223e\u0333",
      "acd;", "\u223f",
      "acirc", "\u00e2",
      "acirc;", "\u00e2",
      "acute", "\u00b4",
      "acute;", "\u00b4",
      "acy;", "\u0430",
      "aelig", "\u00e6",
      "aelig;", "\u00e6",
      "af;", "\u2061",
      "afr;", "\ud835\udd1e",
      "agrave", "\u00e0",
      "agrave;", "\u00e0",
      "alefsym;", "\u2135",
      "aleph;", "\u2135",
      "alpha;", "\u03b1",
      "amacr;", "\u0101",
      "amalg;", "\u2a3f",
      "amp", "\u0026",
      "amp;", "\u0026",
      "and;", "\u2227",
      "andand;", "\u2a55",
      "andd;", "\u2a5c",
      "andslope;", "\u2a58",
      "andv;", "\u2a5a",
      "ang;", "\u2220",
      "ange;", "\u29a4",
      "angle;", "\u2220",
      "angmsd;", "\u2221",
      "angmsdaa;", "\u29a8",
      "angmsdab;", "\u29a9",
      "angmsdac;", "\u29aa",
      "angmsdad;", "\u29ab",
      "angmsdae;", "\u29ac",
      "angmsdaf;", "\u29ad",
      "angmsdag;", "\u29ae",
      "angmsdah;", "\u29af",
      "angrt;", "\u221f",
      "angrtvb;", "\u22be",
      "angrtvbd;", "\u299d",
      "angsph;", "\u2222",
      "angst;", "\u00c5",
      "angzarr;", "\u237c",
      "aogon;", "\u0105",
      "aopf;", "\ud835\udd52",
      "ap;", "\u2248",
      "apE;", "\u2a70",
      "apacir;", "\u2a6f",
      "ape;", "\u224a",
      "apid;", "\u224b",
      "apos;", "\u0027",
      "approx;", "\u2248",
      "approxeq;", "\u224a",
      "aring", "\u00e5",
      "aring;", "\u00e5",
      "ascr;", "\ud835\udcb6",
      "ast;", "\u002a",
      "asymp;", "\u2248",
      "asympeq;", "\u224d",
      "atilde", "\u00e3",
      "atilde;", "\u00e3",
      "auml", "\u00e4",
      "auml;", "\u00e4",
      "awconint;", "\u2233",
      "awint;", "\u2a11",
      "bNot;", "\u2aed",
      "backcong;", "\u224c",
      "backepsilon;", "\u03f6",
      "backprime;", "\u2035",
      "backsim;", "\u223d",
      "backsimeq;", "\u22cd",
      "barvee;", "\u22bd",
      "barwed;", "\u2305",
      "barwedge;", "\u2305",
      "bbrk;", "\u23b5",
      "bbrktbrk;", "\u23b6",
      "bcong;", "\u224c",
      "bcy;", "\u0431",
      "bdquo;", "\u201e",
      "becaus;", "\u2235",
      "because;", "\u2235",
      "bemptyv;", "\u29b0",
      "bepsi;", "\u03f6",
      "bernou;", "\u212c",
      "beta;", "\u03b2",
      "beth;", "\u2136",
      "between;", "\u226c",
      "bfr;", "\ud835\udd1f",
      "bigcap;", "\u22c2",
      "bigcirc;", "\u25ef",
      "bigcup;", "\u22c3",
      "bigodot;", "\u2a00",
      "bigoplus;", "\u2a01",
      "bigotimes;", "\u2a02",
      "bigsqcup;", "\u2a06",
      "bigstar;", "\u2605",
      "bigtriangledown;", "\u25bd",
      "bigtriangleup;", "\u25b3",
      "biguplus;", "\u2a04",
      "bigvee;", "\u22c1",
      "bigwedge;", "\u22c0",
      "bkarow;", "\u290d",
      "blacklozenge;", "\u29eb",
      "blacksquare;", "\u25aa",
      "blacktriangle;", "\u25b4",
      "blacktriangledown;", "\u25be",
      "blacktriangleleft;", "\u25c2",
      "blacktriangleright;", "\u25b8",
      "blank;", "\u2423",
      "blk12;", "\u2592",
      "blk14;", "\u2591",
      "blk34;", "\u2593",
      "block;", "\u2588",
      "bne;", "\u003d\u20e5",
      "bnequiv;", "\u2261\u20e5",
      "bnot;", "\u2310",
      "bopf;", "\ud835\udd53",
      "bot;", "\u22a5",
      "bottom;", "\u22a5",
      "bowtie;", "\u22c8",
      "boxDL;", "\u2557",
      "boxDR;", "\u2554",
      "boxDl;", "\u2556",
      "boxDr;", "\u2553",
      "boxH;", "\u2550",
      "boxHD;", "\u2566",
      "boxHU;", "\u2569",
      "boxHd;", "\u2564",
      "boxHu;", "\u2567",
      "boxUL;", "\u255d",
      "boxUR;", "\u255a",
      "boxUl;", "\u255c",
      "boxUr;", "\u2559",
      "boxV;", "\u2551",
      "boxVH;", "\u256c",
      "boxVL;", "\u2563",
      "boxVR;", "\u2560",
      "boxVh;", "\u256b",
      "boxVl;", "\u2562",
      "boxVr;", "\u255f",
      "boxbox;", "\u29c9",
      "boxdL;", "\u2555",
      "boxdR;", "\u2552",
      "boxdl;", "\u2510",
      "boxdr;", "\u250c",
      "boxh;", "\u2500",
      "boxhD;", "\u2565",
      "boxhU;", "\u2568",
      "boxhd;", "\u252c",
      "boxhu;", "\u2534",
      "boxminus;", "\u229f",
      "boxplus;", "\u229e",
      "boxtimes;", "\u22a0",
      "boxuL;", "\u255b",
      "boxuR;", "\u2558",
      "boxul;", "\u2518",
      "boxur;", "\u2514",
      "boxv;", "\u2502",
      "boxvH;", "\u256a",
      "boxvL;", "\u2561",
      "boxvR;", "\u255e",
      "boxvh;", "\u253c",
      "boxvl;", "\u2524",
      "boxvr;", "\u251c",
      "bprime;", "\u2035",
      "breve;", "\u02d8",
      "brvbar", "\u00a6",
      "brvbar;", "\u00a6",
      "bscr;", "\ud835\udcb7",
      "bsemi;", "\u204f",
      "bsim;", "\u223d",
      "bsime;", "\u22cd",
      "bsol;", "\\",
      "bsolb;", "\u29c5",
      "bsolhsub;", "\u27c8",
      "bull;", "\u2022",
      "bullet;", "\u2022",
      "bump;", "\u224e",
      "bumpE;", "\u2aae",
      "bumpe;", "\u224f",
      "bumpeq;", "\u224f",
      "cacute;", "\u0107",
      "cap;", "\u2229",
      "capand;", "\u2a44",
      "capbrcup;", "\u2a49",
      "capcap;", "\u2a4b",
      "capcup;", "\u2a47",
      "capdot;", "\u2a40",
      "caps;", "\u2229\ufe00",
      "caret;", "\u2041",
      "caron;", "\u02c7",
      "ccaps;", "\u2a4d",
      "ccaron;", "\u010d",
      "ccedil", "\u00e7",
      "ccedil;", "\u00e7",
      "ccirc;", "\u0109",
      "ccups;", "\u2a4c",
      "ccupssm;", "\u2a50",
      "cdot;", "\u010b",
      "cedil", "\u00b8",
      "cedil;", "\u00b8",
      "cemptyv;", "\u29b2",
      "cent", "\u00a2",
      "cent;", "\u00a2",
      "centerdot;", "\u00b7",
      "cfr;", "\ud835\udd20",
      "chcy;", "\u0447",
      "check;", "\u2713",
      "checkmark;", "\u2713",
      "chi;", "\u03c7",
      "cir;", "\u25cb",
      "cirE;", "\u29c3",
      "circ;", "\u02c6",
      "circeq;", "\u2257",
      "circlearrowleft;", "\u21ba",
      "circlearrowright;", "\u21bb",
      "circledR;", "\u00ae",
      "circledS;", "\u24c8",
      "circledast;", "\u229b",
      "circledcirc;", "\u229a",
      "circleddash;", "\u229d",
      "cire;", "\u2257",
      "cirfnint;", "\u2a10",
      "cirmid;", "\u2aef",
      "cirscir;", "\u29c2",
      "clubs;", "\u2663",
      "clubsuit;", "\u2663",
      "colon;", "\u003a",
      "colone;", "\u2254",
      "coloneq;", "\u2254",
      "comma;", "\u002c",
      "commat;", "\u0040",
      "comp;", "\u2201",
      "compfn;", "\u2218",
      "complement;", "\u2201",
      "complexes;", "\u2102",
      "cong;", "\u2245",
      "congdot;", "\u2a6d",
      "conint;", "\u222e",
      "copf;", "\ud835\udd54",
      "coprod;", "\u2210",
      "copy", "\u00a9",
      "copy;", "\u00a9",
      "copysr;", "\u2117",
      "crarr;", "\u21b5",
      "cross;", "\u2717",
      "cscr;", "\ud835\udcb8",
      "csub;", "\u2acf",
      "csube;", "\u2ad1",
      "csup;", "\u2ad0",
      "csupe;", "\u2ad2",
      "ctdot;", "\u22ef",
      "cudarrl;", "\u2938",
      "cudarrr;", "\u2935",
      "cuepr;", "\u22de",
      "cuesc;", "\u22df",
      "cularr;", "\u21b6",
      "cularrp;", "\u293d",
      "cup;", "\u222a",
      "cupbrcap;", "\u2a48",
      "cupcap;", "\u2a46",
      "cupcup;", "\u2a4a",
      "cupdot;", "\u228d",
      "cupor;", "\u2a45",
      "cups;", "\u222a\ufe00",
      "curarr;", "\u21b7",
      "curarrm;", "\u293c",
      "curlyeqprec;", "\u22de",
      "curlyeqsucc;", "\u22df",
      "curlyvee;", "\u22ce",
      "curlywedge;", "\u22cf",
      "curren", "\u00a4",
      "curren;", "\u00a4",
      "curvearrowleft;", "\u21b6",
      "curvearrowright;", "\u21b7",
      "cuvee;", "\u22ce",
      "cuwed;", "\u22cf",
      "cwconint;", "\u2232",
      "cwint;", "\u2231",
      "cylcty;", "\u232d",
      "dArr;", "\u21d3",
      "dHar;", "\u2965",
      "dagger;", "\u2020",
      "daleth;", "\u2138",
      "darr;", "\u2193",
      "dash;", "\u2010",
      "dashv;", "\u22a3",
      "dbkarow;", "\u290f",
      "dblac;", "\u02dd",
      "dcaron;", "\u010f",
      "dcy;", "\u0434",
      "dd;", "\u2146",
      "ddagger;", "\u2021",
      "ddarr;", "\u21ca",
      "ddotseq;", "\u2a77",
      "deg", "\u00b0",
      "deg;", "\u00b0",
      "delta;", "\u03b4",
      "demptyv;", "\u29b1",
      "dfisht;", "\u297f",
      "dfr;", "\ud835\udd21",
      "dharl;", "\u21c3",
      "dharr;", "\u21c2",
      "diam;", "\u22c4",
      "diamond;", "\u22c4",
      "diamondsuit;", "\u2666",
      "diams;", "\u2666",
      "die;", "\u00a8",
      "digamma;", "\u03dd",
      "disin;", "\u22f2",
      "div;", "\u00f7",
      "divide", "\u00f7",
      "divide;", "\u00f7",
      "divideontimes;", "\u22c7",
      "divonx;", "\u22c7",
      "djcy;", "\u0452",
      "dlcorn;", "\u231e",
      "dlcrop;", "\u230d",
      "dollar;", "\u0024",
      "dopf;", "\ud835\udd55",
      "dot;", "\u02d9",
      "doteq;", "\u2250",
      "doteqdot;", "\u2251",
      "dotminus;", "\u2238",
      "dotplus;", "\u2214",
      "dotsquare;", "\u22a1",
      "doublebarwedge;", "\u2306",
      "downarrow;", "\u2193",
      "downdownarrows;", "\u21ca",
      "downharpoonleft;", "\u21c3",
      "downharpoonright;", "\u21c2",
      "drbkarow;", "\u2910",
      "drcorn;", "\u231f",
      "drcrop;", "\u230c",
      "dscr;", "\ud835\udcb9",
      "dscy;", "\u0455",
      "dsol;", "\u29f6",
      "dstrok;", "\u0111",
      "dtdot;", "\u22f1",
      "dtri;", "\u25bf",
      "dtrif;", "\u25be",
      "duarr;", "\u21f5",
      "duhar;", "\u296f",
      "dwangle;", "\u29a6",
      "dzcy;", "\u045f",
      "dzigrarr;", "\u27ff",
      "eDDot;", "\u2a77",
      "eDot;", "\u2251",
      "eacute", "\u00e9",
      "eacute;", "\u00e9",
      "easter;", "\u2a6e",
      "ecaron;", "\u011b",
      "ecir;", "\u2256",
      "ecirc", "\u00ea",
      "ecirc;", "\u00ea",
      "ecolon;", "\u2255",
      "ecy;", "\u044d",
      "edot;", "\u0117",
      "ee;", "\u2147",
      "efDot;", "\u2252",
      "efr;", "\ud835\udd22",
      "eg;", "\u2a9a",
      "egrave", "\u00e8",
      "egrave;", "\u00e8",
      "egs;", "\u2a96",
      "egsdot;", "\u2a98",
      "el;", "\u2a99",
      "elinters;", "\u23e7",
      "ell;", "\u2113",
      "els;", "\u2a95",
      "elsdot;", "\u2a97",
      "emacr;", "\u0113",
      "empty;", "\u2205",
      "emptyset;", "\u2205",
      "emptyv;", "\u2205",
      "emsp13;", "\u2004",
      "emsp14;", "\u2005",
      "emsp;", "\u2003",
      "eng;", "\u014b",
      "ensp;", "\u2002",
      "eogon;", "\u0119",
      "eopf;", "\ud835\udd56",
      "epar;", "\u22d5",
      "eparsl;", "\u29e3",
      "eplus;", "\u2a71",
      "epsi;", "\u03b5",
      "epsilon;", "\u03b5",
      "epsiv;", "\u03f5",
      "eqcirc;", "\u2256",
      "eqcolon;", "\u2255",
      "eqsim;", "\u2242",
      "eqslantgtr;", "\u2a96",
      "eqslantless;", "\u2a95",
      "equals;", "\u003d",
      "equest;", "\u225f",
      "equiv;", "\u2261",
      "equivDD;", "\u2a78",
      "eqvparsl;", "\u29e5",
      "erDot;", "\u2253",
      "erarr;", "\u2971",
      "escr;", "\u212f",
      "esdot;", "\u2250",
      "esim;", "\u2242",
      "eta;", "\u03b7",
      "eth", "\u00f0",
      "eth;", "\u00f0",
      "euml", "\u00eb",
      "euml;", "\u00eb",
      "euro;", "\u20ac",
      "excl;", "\u0021",
      "exist;", "\u2203",
      "expectation;", "\u2130",
      "exponentiale;", "\u2147",
      "fallingdotseq;", "\u2252",
      "fcy;", "\u0444",
      "female;", "\u2640",
      "ffilig;", "\ufb03",
      "fflig;", "\ufb00",
      "ffllig;", "\ufb04",
      "ffr;", "\ud835\udd23",
      "filig;", "\ufb01",
      "fjlig;", "\u0066\u006a",
      "flat;", "\u266d",
      "fllig;", "\ufb02",
      "fltns;", "\u25b1",
      "fnof;", "\u0192",
      "fopf;", "\ud835\udd57",
      "forall;", "\u2200",
      "fork;", "\u22d4",
      "forkv;", "\u2ad9",
      "fpartint;", "\u2a0d",
      "frac12", "\u00bd",
      "frac12;", "\u00bd",
      "frac13;", "\u2153",
      "frac14", "\u00bc",
      "frac14;", "\u00bc",
      "frac15;", "\u2155",
      "frac16;", "\u2159",
      "frac18;", "\u215b",
      "frac23;", "\u2154",
      "frac25;", "\u2156",
      "frac34", "\u00be",
      "frac34;", "\u00be",
      "frac35;", "\u2157",
      "frac38;", "\u215c",
      "frac45;", "\u2158",
      "frac56;", "\u215a",
      "frac58;", "\u215d",
      "frac78;", "\u215e",
      "frasl;", "\u2044",
      "frown;", "\u2322",
      "fscr;", "\ud835\udcbb",
      "gE;", "\u2267",
      "gEl;", "\u2a8c",
      "gacute;", "\u01f5",
      "gamma;", "\u03b3",
      "gammad;", "\u03dd",
      "gap;", "\u2a86",
      "gbreve;", "\u011f",
      "gcirc;", "\u011d",
      "gcy;", "\u0433",
      "gdot;", "\u0121",
      "ge;", "\u2265",
      "gel;", "\u22db",
      "geq;", "\u2265",
      "geqq;", "\u2267",
      "geqslant;", "\u2a7e",
      "ges;", "\u2a7e",
      "gescc;", "\u2aa9",
      "gesdot;", "\u2a80",
      "gesdoto;", "\u2a82",
      "gesdotol;", "\u2a84",
      "gesl;", "\u22db\ufe00",
      "gesles;", "\u2a94",
      "gfr;", "\ud835\udd24",
      "gg;", "\u226b",
      "ggg;", "\u22d9",
      "gimel;", "\u2137",
      "gjcy;", "\u0453",
      "gl;", "\u2277",
      "glE;", "\u2a92",
      "gla;", "\u2aa5",
      "glj;", "\u2aa4",
      "gnE;", "\u2269",
      "gnap;", "\u2a8a",
      "gnapprox;", "\u2a8a",
      "gne;", "\u2a88",
      "gneq;", "\u2a88",
      "gneqq;", "\u2269",
      "gnsim;", "\u22e7",
      "gopf;", "\ud835\udd58",
      "grave;", "\u0060",
      "gscr;", "\u210a",
      "gsim;", "\u2273",
      "gsime;", "\u2a8e",
      "gsiml;", "\u2a90",
      "gt", "\u003e",
      "gt;", "\u003e",
      "gtcc;", "\u2aa7",
      "gtcir;", "\u2a7a",
      "gtdot;", "\u22d7",
      "gtlPar;", "\u2995",
      "gtquest;", "\u2a7c",
      "gtrapprox;", "\u2a86",
      "gtrarr;", "\u2978",
      "gtrdot;", "\u22d7",
      "gtreqless;", "\u22db",
      "gtreqqless;", "\u2a8c",
      "gtrless;", "\u2277",
      "gtrsim;", "\u2273",
      "gvertneqq;", "\u2269\ufe00",
      "gvnE;", "\u2269\ufe00",
      "hArr;", "\u21d4",
      "hairsp;", "\u200a",
      "half;", "\u00bd",
      "hamilt;", "\u210b",
      "hardcy;", "\u044a",
      "harr;", "\u2194",
      "harrcir;", "\u2948",
      "harrw;", "\u21ad",
      "hbar;", "\u210f",
      "hcirc;", "\u0125",
      "hearts;", "\u2665",
      "heartsuit;", "\u2665",
      "hellip;", "\u2026",
      "hercon;", "\u22b9",
      "hfr;", "\ud835\udd25",
      "hksearow;", "\u2925",
      "hkswarow;", "\u2926",
      "hoarr;", "\u21ff",
      "homtht;", "\u223b",
      "hookleftarrow;", "\u21a9",
      "hookrightarrow;", "\u21aa",
      "hopf;", "\ud835\udd59",
      "horbar;", "\u2015",
      "hscr;", "\ud835\udcbd",
      "hslash;", "\u210f",
      "hstrok;", "\u0127",
      "hybull;", "\u2043",
      "hyphen;", "\u2010",
      "iacute", "\u00ed",
      "iacute;", "\u00ed",
      "ic;", "\u2063",
      "icirc", "\u00ee",
      "icirc;", "\u00ee",
      "icy;", "\u0438",
      "iecy;", "\u0435",
      "iexcl", "\u00a1",
      "iexcl;", "\u00a1",
      "iff;", "\u21d4",
      "ifr;", "\ud835\udd26",
      "igrave", "\u00ec",
      "igrave;", "\u00ec",
      "ii;", "\u2148",
      "iiiint;", "\u2a0c",
      "iiint;", "\u222d",
      "iinfin;", "\u29dc",
      "iiota;", "\u2129",
      "ijlig;", "\u0133",
      "imacr;", "\u012b",
      "image;", "\u2111",
      "imagline;", "\u2110",
      "imagpart;", "\u2111",
      "imath;", "\u0131",
      "imof;", "\u22b7",
      "imped;", "\u01b5",
      "in;", "\u2208",
      "incare;", "\u2105",
      "infin;", "\u221e",
      "infintie;", "\u29dd",
      "inodot;", "\u0131",
      "int;", "\u222b",
      "intcal;", "\u22ba",
      "integers;", "\u2124",
      "intercal;", "\u22ba",
      "intlarhk;", "\u2a17",
      "intprod;", "\u2a3c",
      "iocy;", "\u0451",
      "iogon;", "\u012f",
      "iopf;", "\ud835\udd5a",
      "iota;", "\u03b9",
      "iprod;", "\u2a3c",
      "iquest", "\u00bf",
      "iquest;", "\u00bf",
      "iscr;", "\ud835\udcbe",
      "isin;", "\u2208",
      "isinE;", "\u22f9",
      "isindot;", "\u22f5",
      "isins;", "\u22f4",
      "isinsv;", "\u22f3",
      "isinv;", "\u2208",
      "it;", "\u2062",
      "itilde;", "\u0129",
      "iukcy;", "\u0456",
      "iuml", "\u00ef",
      "iuml;", "\u00ef",
      "jcirc;", "\u0135",
      "jcy;", "\u0439",
      "jfr;", "\ud835\udd27",
      "jmath;", "\u0237",
      "jopf;", "\ud835\udd5b",
      "jscr;", "\ud835\udcbf",
      "jsercy;", "\u0458",
      "jukcy;", "\u0454",
      "kappa;", "\u03ba",
      "kappav;", "\u03f0",
      "kcedil;", "\u0137",
      "kcy;", "\u043a",
      "kfr;", "\ud835\udd28",
      "kgreen;", "\u0138",
      "khcy;", "\u0445",
      "kjcy;", "\u045c",
      "kopf;", "\ud835\udd5c",
      "kscr;", "\ud835\udcc0",
      "lAarr;", "\u21da",
      "lArr;", "\u21d0",
      "lAtail;", "\u291b",
      "lBarr;", "\u290e",
      "lE;", "\u2266",
      "lEg;", "\u2a8b",
      "lHar;", "\u2962",
      "lacute;", "\u013a",
      "laemptyv;", "\u29b4",
      "lagran;", "\u2112",
      "lambda;", "\u03bb",
      "lang;", "\u27e8",
      "langd;", "\u2991",
      "langle;", "\u27e8",
      "lap;", "\u2a85",
      "laquo", "\u00ab",
      "laquo;", "\u00ab",
      "larr;", "\u2190",
      "larrb;", "\u21e4",
      "larrbfs;", "\u291f",
      "larrfs;", "\u291d",
      "larrhk;", "\u21a9",
      "larrlp;", "\u21ab",
      "larrpl;", "\u2939",
      "larrsim;", "\u2973",
      "larrtl;", "\u21a2",
      "lat;", "\u2aab",
      "latail;", "\u2919",
      "late;", "\u2aad",
      "lates;", "\u2aad\ufe00",
      "lbarr;", "\u290c",
      "lbbrk;", "\u2772",
      "lbrace;", "\u007b",
      "lbrack;", "\u005b",
      "lbrke;", "\u298b",
      "lbrksld;", "\u298f",
      "lbrkslu;", "\u298d",
      "lcaron;", "\u013e",
      "lcedil;", "\u013c",
      "lceil;", "\u2308",
      "lcub;", "\u007b",
      "lcy;", "\u043b",
      "ldca;", "\u2936",
      "ldquo;", "\u201c",
      "ldquor;", "\u201e",
      "ldrdhar;", "\u2967",
      "ldrushar;", "\u294b",
      "ldsh;", "\u21b2",
      "le;", "\u2264",
      "leftarrow;", "\u2190",
      "leftarrowtail;", "\u21a2",
      "leftharpoondown;", "\u21bd",
      "leftharpoonup;", "\u21bc",
      "leftleftarrows;", "\u21c7",
      "leftrightarrow;", "\u2194",
      "leftrightarrows;", "\u21c6",
      "leftrightharpoons;", "\u21cb",
      "leftrightsquigarrow;", "\u21ad",
      "leftthreetimes;", "\u22cb",
      "leg;", "\u22da",
      "leq;", "\u2264",
      "leqq;", "\u2266",
      "leqslant;", "\u2a7d",
      "les;", "\u2a7d",
      "lescc;", "\u2aa8",
      "lesdot;", "\u2a7f",
      "lesdoto;", "\u2a81",
      "lesdotor;", "\u2a83",
      "lesg;", "\u22da\ufe00",
      "lesges;", "\u2a93",
      "lessapprox;", "\u2a85",
      "lessdot;", "\u22d6",
      "lesseqgtr;", "\u22da",
      "lesseqqgtr;", "\u2a8b",
      "lessgtr;", "\u2276",
      "lesssim;", "\u2272",
      "lfisht;", "\u297c",
      "lfloor;", "\u230a",
      "lfr;", "\ud835\udd29",
      "lg;", "\u2276",
      "lgE;", "\u2a91",
      "lhard;", "\u21bd",
      "lharu;", "\u21bc",
      "lharul;", "\u296a",
      "lhblk;", "\u2584",
      "ljcy;", "\u0459",
      "ll;", "\u226a",
      "llarr;", "\u21c7",
      "llcorner;", "\u231e",
      "llhard;", "\u296b",
      "lltri;", "\u25fa",
      "lmidot;", "\u0140",
      "lmoust;", "\u23b0",
      "lmoustache;", "\u23b0",
      "lnE;", "\u2268",
      "lnap;", "\u2a89",
      "lnapprox;", "\u2a89",
      "lne;", "\u2a87",
      "lneq;", "\u2a87",
      "lneqq;", "\u2268",
      "lnsim;", "\u22e6",
      "loang;", "\u27ec",
      "loarr;", "\u21fd",
      "lobrk;", "\u27e6",
      "longleftarrow;", "\u27f5",
      "longleftrightarrow;", "\u27f7",
      "longmapsto;", "\u27fc",
      "longrightarrow;", "\u27f6",
      "looparrowleft;", "\u21ab",
      "looparrowright;", "\u21ac",
      "lopar;", "\u2985",
      "lopf;", "\ud835\udd5d",
      "loplus;", "\u2a2d",
      "lotimes;", "\u2a34",
      "lowast;", "\u2217",
      "lowbar;", "\u005f",
      "loz;", "\u25ca",
      "lozenge;", "\u25ca",
      "lozf;", "\u29eb",
      "lpar;", "\u0028",
      "lparlt;", "\u2993",
      "lrarr;", "\u21c6",
      "lrcorner;", "\u231f",
      "lrhar;", "\u21cb",
      "lrhard;", "\u296d",
      "lrm;", "\u200e",
      "lrtri;", "\u22bf",
      "lsaquo;", "\u2039",
      "lscr;", "\ud835\udcc1",
      "lsh;", "\u21b0",
      "lsim;", "\u2272",
      "lsime;", "\u2a8d",
      "lsimg;", "\u2a8f",
      "lsqb;", "\u005b",
      "lsquo;", "\u2018",
      "lsquor;", "\u201a",
      "lstrok;", "\u0142",
      "lt", "\u003c",
      "lt;", "\u003c",
      "ltcc;", "\u2aa6",
      "ltcir;", "\u2a79",
      "ltdot;", "\u22d6",
      "lthree;", "\u22cb",
      "ltimes;", "\u22c9",
      "ltlarr;", "\u2976",
      "ltquest;", "\u2a7b",
      "ltrPar;", "\u2996",
      "ltri;", "\u25c3",
      "ltrie;", "\u22b4",
      "ltrif;", "\u25c2",
      "lurdshar;", "\u294a",
      "luruhar;", "\u2966",
      "lvertneqq;", "\u2268\ufe00",
      "lvnE;", "\u2268\ufe00",
      "mDDot;", "\u223a",
      "macr", "\u00af",
      "macr;", "\u00af",
      "male;", "\u2642",
      "malt;", "\u2720",
      "maltese;", "\u2720",
      "map;", "\u21a6",
      "mapsto;", "\u21a6",
      "mapstodown;", "\u21a7",
      "mapstoleft;", "\u21a4",
      "mapstoup;", "\u21a5",
      "marker;", "\u25ae",
      "mcomma;", "\u2a29",
      "mcy;", "\u043c",
      "mdash;", "\u2014",
      "measuredangle;", "\u2221",
      "mfr;", "\ud835\udd2a",
      "mho;", "\u2127",
      "micro", "\u00b5",
      "micro;", "\u00b5",
      "mid;", "\u2223",
      "midast;", "\u002a",
      "midcir;", "\u2af0",
      "middot", "\u00b7",
      "middot;", "\u00b7",
      "minus;", "\u2212",
      "minusb;", "\u229f",
      "minusd;", "\u2238",
      "minusdu;", "\u2a2a",
      "mlcp;", "\u2adb",
      "mldr;", "\u2026",
      "mnplus;", "\u2213",
      "models;", "\u22a7",
      "mopf;", "\ud835\udd5e",
      "mp;", "\u2213",
      "mscr;", "\ud835\udcc2",
      "mstpos;", "\u223e",
      "mu;", "\u03bc",
      "multimap;", "\u22b8",
      "mumap;", "\u22b8",
      "nGg;", "\u22d9\u0338",
      "nGt;", "\u226b\u20d2",
      "nGtv;", "\u226b\u0338",
      "nLeftarrow;", "\u21cd",
      "nLeftrightarrow;", "\u21ce",
      "nLl;", "\u22d8\u0338",
      "nLt;", "\u226a\u20d2",
      "nLtv;", "\u226a\u0338",
      "nRightarrow;", "\u21cf",
      "nVDash;", "\u22af",
      "nVdash;", "\u22ae",
      "nabla;", "\u2207",
      "nacute;", "\u0144",
      "nang;", "\u2220\u20d2",
      "nap;", "\u2249",
      "napE;", "\u2a70\u0338",
      "napid;", "\u224b\u0338",
      "napos;", "\u0149",
      "napprox;", "\u2249",
      "natur;", "\u266e",
      "natural;", "\u266e",
      "naturals;", "\u2115",
      "nbsp", "\u00a0",
      "nbsp;", "\u00a0",
      "nbump;", "\u224e\u0338",
      "nbumpe;", "\u224f\u0338",
      "ncap;", "\u2a43",
      "ncaron;", "\u0148",
      "ncedil;", "\u0146",
      "ncong;", "\u2247",
      "ncongdot;", "\u2a6d\u0338",
      "ncup;", "\u2a42",
      "ncy;", "\u043d",
      "ndash;", "\u2013",
      "ne;", "\u2260",
      "neArr;", "\u21d7",
      "nearhk;", "\u2924",
      "nearr;", "\u2197",
      "nearrow;", "\u2197",
      "nedot;", "\u2250\u0338",
      "nequiv;", "\u2262",
      "nesear;", "\u2928",
      "nesim;", "\u2242\u0338",
      "nexist;", "\u2204",
      "nexists;", "\u2204",
      "nfr;", "\ud835\udd2b",
      "ngE;", "\u2267\u0338",
      "nge;", "\u2271",
      "ngeq;", "\u2271",
      "ngeqq;", "\u2267\u0338",
      "ngeqslant;", "\u2a7e\u0338",
      "nges;", "\u2a7e\u0338",
      "ngsim;", "\u2275",
      "ngt;", "\u226f",
      "ngtr;", "\u226f",
      "nhArr;", "\u21ce",
      "nharr;", "\u21ae",
      "nhpar;", "\u2af2",
      "ni;", "\u220b",
      "nis;", "\u22fc",
      "nisd;", "\u22fa",
      "niv;", "\u220b",
      "njcy;", "\u045a",
      "nlArr;", "\u21cd",
      "nlE;", "\u2266\u0338",
      "nlarr;", "\u219a",
      "nldr;", "\u2025",
      "nle;", "\u2270",
      "nleftarrow;", "\u219a",
      "nleftrightarrow;", "\u21ae",
      "nleq;", "\u2270",
      "nleqq;", "\u2266\u0338",
      "nleqslant;", "\u2a7d\u0338",
      "nles;", "\u2a7d\u0338",
      "nless;", "\u226e",
      "nlsim;", "\u2274",
      "nlt;", "\u226e",
      "nltri;", "\u22ea",
      "nltrie;", "\u22ec",
      "nmid;", "\u2224",
      "nopf;", "\ud835\udd5f",
      "not", "\u00ac",
      "not;", "\u00ac",
      "notin;", "\u2209",
      "notinE;", "\u22f9\u0338",
      "notindot;", "\u22f5\u0338",
      "notinva;", "\u2209",
      "notinvb;", "\u22f7",
      "notinvc;", "\u22f6",
      "notni;", "\u220c",
      "notniva;", "\u220c",
      "notnivb;", "\u22fe",
      "notnivc;", "\u22fd",
      "npar;", "\u2226",
      "nparallel;", "\u2226",
      "nparsl;", "\u2afd\u20e5",
      "npart;", "\u2202\u0338",
      "npolint;", "\u2a14",
      "npr;", "\u2280",
      "nprcue;", "\u22e0",
      "npre;", "\u2aaf\u0338",
      "nprec;", "\u2280",
      "npreceq;", "\u2aaf\u0338",
      "nrArr;", "\u21cf",
      "nrarr;", "\u219b",
      "nrarrc;", "\u2933\u0338",
      "nrarrw;", "\u219d\u0338",
      "nrightarrow;", "\u219b",
      "nrtri;", "\u22eb",
      "nrtrie;", "\u22ed",
      "nsc;", "\u2281",
      "nsccue;", "\u22e1",
      "nsce;", "\u2ab0\u0338",
      "nscr;", "\ud835\udcc3",
      "nshortmid;", "\u2224",
      "nshortparallel;", "\u2226",
      "nsim;", "\u2241",
      "nsime;", "\u2244",
      "nsimeq;", "\u2244",
      "nsmid;", "\u2224",
      "nspar;", "\u2226",
      "nsqsube;", "\u22e2",
      "nsqsupe;", "\u22e3",
      "nsub;", "\u2284",
      "nsubE;", "\u2ac5\u0338",
      "nsube;", "\u2288",
      "nsubset;", "\u2282\u20d2",
      "nsubseteq;", "\u2288",
      "nsubseteqq;", "\u2ac5\u0338",
      "nsucc;", "\u2281",
      "nsucceq;", "\u2ab0\u0338",
      "nsup;", "\u2285",
      "nsupE;", "\u2ac6\u0338",
      "nsupe;", "\u2289",
      "nsupset;", "\u2283\u20d2",
      "nsupseteq;", "\u2289",
      "nsupseteqq;", "\u2ac6\u0338",
      "ntgl;", "\u2279",
      "ntilde", "\u00f1",
      "ntilde;", "\u00f1",
      "ntlg;", "\u2278",
      "ntriangleleft;", "\u22ea",
      "ntrianglelefteq;", "\u22ec",
      "ntriangleright;", "\u22eb",
      "ntrianglerighteq;", "\u22ed",
      "nu;", "\u03bd",
      "num;", "\u0023",
      "numero;", "\u2116",
      "numsp;", "\u2007",
      "nvDash;", "\u22ad",
      "nvHarr;", "\u2904",
      "nvap;", "\u224d\u20d2",
      "nvdash;", "\u22ac",
      "nvge;", "\u2265\u20d2",
      "nvgt;", "\u003e\u20d2",
      "nvinfin;", "\u29de",
      "nvlArr;", "\u2902",
      "nvle;", "\u2264\u20d2",
      "nvlt;", "\u003c\u20d2",
      "nvltrie;", "\u22b4\u20d2",
      "nvrArr;", "\u2903",
      "nvrtrie;", "\u22b5\u20d2",
      "nvsim;", "\u223c\u20d2",
      "nwArr;", "\u21d6",
      "nwarhk;", "\u2923",
      "nwarr;", "\u2196",
      "nwarrow;", "\u2196",
      "nwnear;", "\u2927",
      "oS;", "\u24c8",
      "oacute", "\u00f3",
      "oacute;", "\u00f3",
      "oast;", "\u229b",
      "ocir;", "\u229a",
      "ocirc", "\u00f4",
      "ocirc;", "\u00f4",
      "ocy;", "\u043e",
      "odash;", "\u229d",
      "odblac;", "\u0151",
      "odiv;", "\u2a38",
      "odot;", "\u2299",
      "odsold;", "\u29bc",
      "oelig;", "\u0153",
      "ofcir;", "\u29bf",
      "ofr;", "\ud835\udd2c",
      "ogon;", "\u02db",
      "ograve", "\u00f2",
      "ograve;", "\u00f2",
      "ogt;", "\u29c1",
      "ohbar;", "\u29b5",
      "ohm;", "\u03a9",
      "oint;", "\u222e",
      "olarr;", "\u21ba",
      "olcir;", "\u29be",
      "olcross;", "\u29bb",
      "oline;", "\u203e",
      "olt;", "\u29c0",
      "omacr;", "\u014d",
      "omega;", "\u03c9",
      "omicron;", "\u03bf",
      "omid;", "\u29b6",
      "ominus;", "\u2296",
      "oopf;", "\ud835\udd60",
      "opar;", "\u29b7",
      "operp;", "\u29b9",
      "oplus;", "\u2295",
      "or;", "\u2228",
      "orarr;", "\u21bb",
      "ord;", "\u2a5d",
      "order;", "\u2134",
      "orderof;", "\u2134",
      "ordf", "\u00aa",
      "ordf;", "\u00aa",
      "ordm", "\u00ba",
      "ordm;", "\u00ba",
      "origof;", "\u22b6",
      "oror;", "\u2a56",
      "orslope;", "\u2a57",
      "orv;", "\u2a5b",
      "oscr;", "\u2134",
      "oslash", "\u00f8",
      "oslash;", "\u00f8",
      "osol;", "\u2298",
      "otilde", "\u00f5",
      "otilde;", "\u00f5",
      "otimes;", "\u2297",
      "otimesas;", "\u2a36",
      "ouml", "\u00f6",
      "ouml;", "\u00f6",
      "ovbar;", "\u233d",
      "par;", "\u2225",
      "para", "\u00b6",
      "para;", "\u00b6",
      "parallel;", "\u2225",
      "parsim;", "\u2af3",
      "parsl;", "\u2afd",
      "part;", "\u2202",
      "pcy;", "\u043f",
      "percnt;", "\u0025",
      "period;", "\u002e",
      "permil;", "\u2030",
      "perp;", "\u22a5",
      "pertenk;", "\u2031",
      "pfr;", "\ud835\udd2d",
      "phi;", "\u03c6",
      "phiv;", "\u03d5",
      "phmmat;", "\u2133",
      "phone;", "\u260e",
      "pi;", "\u03c0",
      "pitchfork;", "\u22d4",
      "piv;", "\u03d6",
      "planck;", "\u210f",
      "planckh;", "\u210e",
      "plankv;", "\u210f",
      "plus;", "\u002b",
      "plusacir;", "\u2a23",
      "plusb;", "\u229e",
      "pluscir;", "\u2a22",
      "plusdo;", "\u2214",
      "plusdu;", "\u2a25",
      "pluse;", "\u2a72",
      "plusmn", "\u00b1",
      "plusmn;", "\u00b1",
      "plussim;", "\u2a26",
      "plustwo;", "\u2a27",
      "pm;", "\u00b1",
      "pointint;", "\u2a15",
      "popf;", "\ud835\udd61",
      "pound", "\u00a3",
      "pound;", "\u00a3",
      "pr;", "\u227a",
      "prE;", "\u2ab3",
      "prap;", "\u2ab7",
      "prcue;", "\u227c",
      "pre;", "\u2aaf",
      "prec;", "\u227a",
      "precapprox;", "\u2ab7",
      "preccurlyeq;", "\u227c",
      "preceq;", "\u2aaf",
      "precnapprox;", "\u2ab9",
      "precneqq;", "\u2ab5",
      "precnsim;", "\u22e8",
      "precsim;", "\u227e",
      "prime;", "\u2032",
      "primes;", "\u2119",
      "prnE;", "\u2ab5",
      "prnap;", "\u2ab9",
      "prnsim;", "\u22e8",
      "prod;", "\u220f",
      "profalar;", "\u232e",
      "profline;", "\u2312",
      "profsurf;", "\u2313",
      "prop;", "\u221d",
      "propto;", "\u221d",
      "prsim;", "\u227e",
      "prurel;", "\u22b0",
      "pscr;", "\ud835\udcc5",
      "psi;", "\u03c8",
      "puncsp;", "\u2008",
      "qfr;", "\ud835\udd2e",
      "qint;", "\u2a0c",
      "qopf;", "\ud835\udd62",
      "qprime;", "\u2057",
      "qscr;", "\ud835\udcc6",
      "quaternions;", "\u210d",
      "quatint;", "\u2a16",
      "quest;", "\u003f",
      "questeq;", "\u225f",
      "quot", "\"",
      "quot;", "\"",
      "rAarr;", "\u21db",
      "rArr;", "\u21d2",
      "rAtail;", "\u291c",
      "rBarr;", "\u290f",
      "rHar;", "\u2964",
      "race;", "\u223d\u0331",
      "racute;", "\u0155",
      "radic;", "\u221a",
      "raemptyv;", "\u29b3",
      "rang;", "\u27e9",
      "rangd;", "\u2992",
      "range;", "\u29a5",
      "rangle;", "\u27e9",
      "raquo", "\u00bb",
      "raquo;", "\u00bb",
      "rarr;", "\u2192",
      "rarrap;", "\u2975",
      "rarrb;", "\u21e5",
      "rarrbfs;", "\u2920",
      "rarrc;", "\u2933",
      "rarrfs;", "\u291e",
      "rarrhk;", "\u21aa",
      "rarrlp;", "\u21ac",
      "rarrpl;", "\u2945",
      "rarrsim;", "\u2974",
      "rarrtl;", "\u21a3",
      "rarrw;", "\u219d",
      "ratail;", "\u291a",
      "ratio;", "\u2236",
      "rationals;", "\u211a",
      "rbarr;", "\u290d",
      "rbbrk;", "\u2773",
      "rbrace;", "\u007d",
      "rbrack;", "\u005d",
      "rbrke;", "\u298c",
      "rbrksld;", "\u298e",
      "rbrkslu;", "\u2990",
      "rcaron;", "\u0159",
      "rcedil;", "\u0157",
      "rceil;", "\u2309",
      "rcub;", "\u007d",
      "rcy;", "\u0440",
      "rdca;", "\u2937",
      "rdldhar;", "\u2969",
      "rdquo;", "\u201d",
      "rdquor;", "\u201d",
      "rdsh;", "\u21b3",
      "real;", "\u211c",
      "realine;", "\u211b",
      "realpart;", "\u211c",
      "reals;", "\u211d",
      "rect;", "\u25ad",
      "reg", "\u00ae",
      "reg;", "\u00ae",
      "rfisht;", "\u297d",
      "rfloor;", "\u230b",
      "rfr;", "\ud835\udd2f",
      "rhard;", "\u21c1",
      "rharu;", "\u21c0",
      "rharul;", "\u296c",
      "rho;", "\u03c1",
      "rhov;", "\u03f1",
      "rightarrow;", "\u2192",
      "rightarrowtail;", "\u21a3",
      "rightharpoondown;", "\u21c1",
      "rightharpoonup;", "\u21c0",
      "rightleftarrows;", "\u21c4",
      "rightleftharpoons;", "\u21cc",
      "rightrightarrows;", "\u21c9",
      "rightsquigarrow;", "\u219d",
      "rightthreetimes;", "\u22cc",
      "ring;", "\u02da",
      "risingdotseq;", "\u2253",
      "rlarr;", "\u21c4",
      "rlhar;", "\u21cc",
      "rlm;", "\u200f",
      "rmoust;", "\u23b1",
      "rmoustache;", "\u23b1",
      "rnmid;", "\u2aee",
      "roang;", "\u27ed",
      "roarr;", "\u21fe",
      "robrk;", "\u27e7",
      "ropar;", "\u2986",
      "ropf;", "\ud835\udd63",
      "roplus;", "\u2a2e",
      "rotimes;", "\u2a35",
      "rpar;", "\u0029",
      "rpargt;", "\u2994",
      "rppolint;", "\u2a12",
      "rrarr;", "\u21c9",
      "rsaquo;", "\u203a",
      "rscr;", "\ud835\udcc7",
      "rsh;", "\u21b1",
      "rsqb;", "\u005d",
      "rsquo;", "\u2019",
      "rsquor;", "\u2019",
      "rthree;", "\u22cc",
      "rtimes;", "\u22ca",
      "rtri;", "\u25b9",
      "rtrie;", "\u22b5",
      "rtrif;", "\u25b8",
      "rtriltri;", "\u29ce",
      "ruluhar;", "\u2968",
      "rx;", "\u211e",
      "sacute;", "\u015b",
      "sbquo;", "\u201a",
      "sc;", "\u227b",
      "scE;", "\u2ab4",
      "scap;", "\u2ab8",
      "scaron;", "\u0161",
      "sccue;", "\u227d",
      "sce;", "\u2ab0",
      "scedil;", "\u015f",
      "scirc;", "\u015d",
      "scnE;", "\u2ab6",
      "scnap;", "\u2aba",
      "scnsim;", "\u22e9",
      "scpolint;", "\u2a13",
      "scsim;", "\u227f",
      "scy;", "\u0441",
      "sdot;", "\u22c5",
      "sdotb;", "\u22a1",
      "sdote;", "\u2a66",
      "seArr;", "\u21d8",
      "searhk;", "\u2925",
      "searr;", "\u2198",
      "searrow;", "\u2198",
      "sect", "\u00a7",
      "sect;", "\u00a7",
      "semi;", "\u003b",
      "seswar;", "\u2929",
      "setminus;", "\u2216",
      "setmn;", "\u2216",
      "sext;", "\u2736",
      "sfr;", "\ud835\udd30",
      "sfrown;", "\u2322",
      "sharp;", "\u266f",
      "shchcy;", "\u0449",
      "shcy;", "\u0448",
      "shortmid;", "\u2223",
      "shortparallel;", "\u2225",
      "shy", "\u00ad",
      "shy;", "\u00ad",
      "sigma;", "\u03c3",
      "sigmaf;", "\u03c2",
      "sigmav;", "\u03c2",
      "sim;", "\u223c",
      "simdot;", "\u2a6a",
      "sime;", "\u2243",
      "simeq;", "\u2243",
      "simg;", "\u2a9e",
      "simgE;", "\u2aa0",
      "siml;", "\u2a9d",
      "simlE;", "\u2a9f",
      "simne;", "\u2246",
      "simplus;", "\u2a24",
      "simrarr;", "\u2972",
      "slarr;", "\u2190",
      "smallsetminus;", "\u2216",
      "smashp;", "\u2a33",
      "smeparsl;", "\u29e4",
      "smid;", "\u2223",
      "smile;", "\u2323",
      "smt;", "\u2aaa",
      "smte;", "\u2aac",
      "smtes;", "\u2aac\ufe00",
      "softcy;", "\u044c",
      "sol;", "\u002f",
      "solb;", "\u29c4",
      "solbar;", "\u233f",
      "sopf;", "\ud835\udd64",
      "spades;", "\u2660",
      "spadesuit;", "\u2660",
      "spar;", "\u2225",
      "sqcap;", "\u2293",
      "sqcaps;", "\u2293\ufe00",
      "sqcup;", "\u2294",
      "sqcups;", "\u2294\ufe00",
      "sqsub;", "\u228f",
      "sqsube;", "\u2291",
      "sqsubset;", "\u228f",
      "sqsubseteq;", "\u2291",
      "sqsup;", "\u2290",
      "sqsupe;", "\u2292",
      "sqsupset;", "\u2290",
      "sqsupseteq;", "\u2292",
      "squ;", "\u25a1",
      "square;", "\u25a1",
      "squarf;", "\u25aa",
      "squf;", "\u25aa",
      "srarr;", "\u2192",
      "sscr;", "\ud835\udcc8",
      "ssetmn;", "\u2216",
      "ssmile;", "\u2323",
      "sstarf;", "\u22c6",
      "star;", "\u2606",
      "starf;", "\u2605",
      "straightepsilon;", "\u03f5",
      "straightphi;", "\u03d5",
      "strns;", "\u00af",
      "sub;", "\u2282",
      "subE;", "\u2ac5",
      "subdot;", "\u2abd",
      "sube;", "\u2286",
      "subedot;", "\u2ac3",
      "submult;", "\u2ac1",
      "subnE;", "\u2acb",
      "subne;", "\u228a",
      "subplus;", "\u2abf",
      "subrarr;", "\u2979",
      "subset;", "\u2282",
      "subseteq;", "\u2286",
      "subseteqq;", "\u2ac5",
      "subsetneq;", "\u228a",
      "subsetneqq;", "\u2acb",
      "subsim;", "\u2ac7",
      "subsub;", "\u2ad5",
      "subsup;", "\u2ad3",
      "succ;", "\u227b",
      "succapprox;", "\u2ab8",
      "succcurlyeq;", "\u227d",
      "succeq;", "\u2ab0",
      "succnapprox;", "\u2aba",
      "succneqq;", "\u2ab6",
      "succnsim;", "\u22e9",
      "succsim;", "\u227f",
      "sum;", "\u2211",
      "sung;", "\u266a",
      "sup1", "\u00b9",
      "sup1;", "\u00b9",
      "sup2", "\u00b2",
      "sup2;", "\u00b2",
      "sup3", "\u00b3",
      "sup3;", "\u00b3",
      "sup;", "\u2283",
      "supE;", "\u2ac6",
      "supdot;", "\u2abe",
      "supdsub;", "\u2ad8",
      "supe;", "\u2287",
      "supedot;", "\u2ac4",
      "suphsol;", "\u27c9",
      "suphsub;", "\u2ad7",
      "suplarr;", "\u297b",
      "supmult;", "\u2ac2",
      "supnE;", "\u2acc",
      "supne;", "\u228b",
      "supplus;", "\u2ac0",
      "supset;", "\u2283",
      "supseteq;", "\u2287",
      "supseteqq;", "\u2ac6",
      "supsetneq;", "\u228b",
      "supsetneqq;", "\u2acc",
      "supsim;", "\u2ac8",
      "supsub;", "\u2ad4",
      "supsup;", "\u2ad6",
      "swArr;", "\u21d9",
      "swarhk;", "\u2926",
      "swarr;", "\u2199",
      "swarrow;", "\u2199",
      "swnwar;", "\u292a",
      "szlig", "\u00df",
      "szlig;", "\u00df",
      "target;", "\u2316",
      "tau;", "\u03c4",
      "tbrk;", "\u23b4",
      "tcaron;", "\u0165",
      "tcedil;", "\u0163",
      "tcy;", "\u0442",
      "tdot;", "\u20db",
      "telrec;", "\u2315",
      "tfr;", "\ud835\udd31",
      "there4;", "\u2234",
      "therefore;", "\u2234",
      "theta;", "\u03b8",
      "thetasym;", "\u03d1",
      "thetav;", "\u03d1",
      "thickapprox;", "\u2248",
      "thicksim;", "\u223c",
      "thinsp;", "\u2009",
      "thkap;", "\u2248",
      "thksim;", "\u223c",
      "thorn", "\u00fe",
      "thorn;", "\u00fe",
      "tilde;", "\u02dc",
      "times", "\u00d7",
      "times;", "\u00d7",
      "timesb;", "\u22a0",
      "timesbar;", "\u2a31",
      "timesd;", "\u2a30",
      "tint;", "\u222d",
      "toea;", "\u2928",
      "top;", "\u22a4",
      "topbot;", "\u2336",
      "topcir;", "\u2af1",
      "topf;", "\ud835\udd65",
      "topfork;", "\u2ada",
      "tosa;", "\u2929",
      "tprime;", "\u2034",
      "trade;", "\u2122",
      "triangle;", "\u25b5",
      "triangledown;", "\u25bf",
      "triangleleft;", "\u25c3",
      "trianglelefteq;", "\u22b4",
      "triangleq;", "\u225c",
      "triangleright;", "\u25b9",
      "trianglerighteq;", "\u22b5",
      "tridot;", "\u25ec",
      "trie;", "\u225c",
      "triminus;", "\u2a3a",
      "triplus;", "\u2a39",
      "trisb;", "\u29cd",
      "tritime;", "\u2a3b",
      "trpezium;", "\u23e2",
      "tscr;", "\ud835\udcc9",
      "tscy;", "\u0446",
      "tshcy;", "\u045b",
      "tstrok;", "\u0167",
      "twixt;", "\u226c",
      "twoheadleftarrow;", "\u219e",
      "twoheadrightarrow;", "\u21a0",
      "uArr;", "\u21d1",
      "uHar;", "\u2963",
      "uacute", "\u00fa",
      "uacute;", "\u00fa",
      "uarr;", "\u2191",
      "ubrcy;", "\u045e",
      "ubreve;", "\u016d",
      "ucirc", "\u00fb",
      "ucirc;", "\u00fb",
      "ucy;", "\u0443",
      "udarr;", "\u21c5",
      "udblac;", "\u0171",
      "udhar;", "\u296e",
      "ufisht;", "\u297e",
      "ufr;", "\ud835\udd32",
      "ugrave", "\u00f9",
      "ugrave;", "\u00f9",
      "uharl;", "\u21bf",
      "uharr;", "\u21be",
      "uhblk;", "\u2580",
      "ulcorn;", "\u231c",
      "ulcorner;", "\u231c",
      "ulcrop;", "\u230f",
      "ultri;", "\u25f8",
      "umacr;", "\u016b",
      "uml", "\u00a8",
      "uml;", "\u00a8",
      "uogon;", "\u0173",
      "uopf;", "\ud835\udd66",
      "uparrow;", "\u2191",
      "updownarrow;", "\u2195",
      "upharpoonleft;", "\u21bf",
      "upharpoonright;", "\u21be",
      "uplus;", "\u228e",
      "upsi;", "\u03c5",
      "upsih;", "\u03d2",
      "upsilon;", "\u03c5",
      "upuparrows;", "\u21c8",
      "urcorn;", "\u231d",
      "urcorner;", "\u231d",
      "urcrop;", "\u230e",
      "uring;", "\u016f",
      "urtri;", "\u25f9",
      "uscr;", "\ud835\udcca",
      "utdot;", "\u22f0",
      "utilde;", "\u0169",
      "utri;", "\u25b5",
      "utrif;", "\u25b4",
      "uuarr;", "\u21c8",
      "uuml", "\u00fc",
      "uuml;", "\u00fc",
      "uwangle;", "\u29a7",
      "vArr;", "\u21d5",
      "vBar;", "\u2ae8",
      "vBarv;", "\u2ae9",
      "vDash;", "\u22a8",
      "vangrt;", "\u299c",
      "varepsilon;", "\u03f5",
      "varkappa;", "\u03f0",
      "varnothing;", "\u2205",
      "varphi;", "\u03d5",
      "varpi;", "\u03d6",
      "varpropto;", "\u221d",
      "varr;", "\u2195",
      "varrho;", "\u03f1",
      "varsigma;", "\u03c2",
      "varsubsetneq;", "\u228a\ufe00",
      "varsubsetneqq;", "\u2acb\ufe00",
      "varsupsetneq;", "\u228b\ufe00",
      "varsupsetneqq;", "\u2acc\ufe00",
      "vartheta;", "\u03d1",
      "vartriangleleft;", "\u22b2",
      "vartriangleright;", "\u22b3",
      "vcy;", "\u0432",
      "vdash;", "\u22a2",
      "vee;", "\u2228",
      "veebar;", "\u22bb",
      "veeeq;", "\u225a",
      "vellip;", "\u22ee",
      "verbar;", "\u007c",
      "vert;", "\u007c",
      "vfr;", "\ud835\udd33",
      "vltri;", "\u22b2",
      "vnsub;", "\u2282\u20d2",
      "vnsup;", "\u2283\u20d2",
      "vopf;", "\ud835\udd67",
      "vprop;", "\u221d",
      "vrtri;", "\u22b3",
      "vscr;", "\ud835\udccb",
      "vsubnE;", "\u2acb\ufe00",
      "vsubne;", "\u228a\ufe00",
      "vsupnE;", "\u2acc\ufe00",
      "vsupne;", "\u228b\ufe00",
      "vzigzag;", "\u299a",
      "wcirc;", "\u0175",
      "wedbar;", "\u2a5f",
      "wedge;", "\u2227",
      "wedgeq;", "\u2259",
      "weierp;", "\u2118",
      "wfr;", "\ud835\udd34",
      "wopf;", "\ud835\udd68",
      "wp;", "\u2118",
      "wr;", "\u2240",
      "wreath;", "\u2240",
      "wscr;", "\ud835\udccc",
      "xcap;", "\u22c2",
      "xcirc;", "\u25ef",
      "xcup;", "\u22c3",
      "xdtri;", "\u25bd",
      "xfr;", "\ud835\udd35",
      "xhArr;", "\u27fa",
      "xharr;", "\u27f7",
      "xi;", "\u03be",
      "xlArr;", "\u27f8",
      "xlarr;", "\u27f5",
      "xmap;", "\u27fc",
      "xnis;", "\u22fb",
      "xodot;", "\u2a00",
      "xopf;", "\ud835\udd69",
      "xoplus;", "\u2a01",
      "xotime;", "\u2a02",
      "xrArr;", "\u27f9",
      "xrarr;", "\u27f6",
      "xscr;", "\ud835\udccd",
      "xsqcup;", "\u2a06",
      "xuplus;", "\u2a04",
      "xutri;", "\u25b3",
      "xvee;", "\u22c1",
      "xwedge;", "\u22c0",
      "yacute", "\u00fd",
      "yacute;", "\u00fd",
      "yacy;", "\u044f",
      "ycirc;", "\u0177",
      "ycy;", "\u044b",
      "yen", "\u00a5",
      "yen;", "\u00a5",
      "yfr;", "\ud835\udd36",
      "yicy;", "\u0457",
      "yopf;", "\ud835\udd6a",
      "yscr;", "\ud835\udcce",
      "yucy;", "\u044e",
      "yuml", "\u00ff",
      "yuml;", "\u00ff",
      "zacute;", "\u017a",
      "zcaron;", "\u017e",
      "zcy;", "\u0437",
      "zdot;", "\u017c",
      "zeetrf;", "\u2128",
      "zeta;", "\u03b6",
      "zfr;", "\ud835\udd37",
      "zhcy;", "\u0436",
      "zigrarr;", "\u21dd",
      "zopf;", "\ud835\udd6b",
      "zscr;", "\ud835\udccf",
      "zwj;", "\u200d",
      "zwnj;", "\u200c",
    };

    final ImmutableMap.Builder builder = ImmutableMap.builder();

    int longestEntityName = 0;
    for (int i = 0, n = pairs.length; i < n; i += 2) {
      String entityName = pairs[i];
      String encodedText = pairs[i + 1];
      builder.put(entityName, encodedText);
      if (entityName.length() > longestEntityName) {
        longestEntityName = entityName.length();
      }
    }

    final Map entityNameToCodePointMap = builder.build();

    ENTITY_TRIE = new Trie(entityNameToCodePointMap);
    LONGEST_ENTITY_NAME = longestEntityName;
  }

  /**
   * Decodes any HTML entity at the given location and appends it to a string
   * builder.  This handles both named and numeric entities.
   *
   * @param html HTML text.
   * @param offset the position of the sequence to decode in {@code html}.
   * @param limit the last position that could be part of the sequence to decode
   *    in {@code html}.
   * @param sb string builder to append to.
   * @return The offset after the end of the decoded sequence in {@code html}.
   * @deprecated specify whether html is in an attribute value.
   */
  public static int appendDecodedEntity(
     String html, int offset, int limit, StringBuilder sb) {
    return appendDecodedEntity(html, offset, limit, false, sb);
  }

  /**
   * Decodes any HTML entity at the given location and appends it to a string
   * builder.  This handles both named and numeric entities.
   *
   * @param html HTML text.
   * @param offset the position of the sequence to decode in {@code html}.
   * @param limit the last position that could be part of the sequence to decode
   *    in {@code html}.
   * @param sb string builder to append to.
   * @return The offset after the end of the decoded sequence in {@code html}.
   */
  public static int appendDecodedEntity(
      String html, int offset, int limit, boolean inAttribute, StringBuilder sb) {
    char ch = html.charAt(offset);
    if ('&' != ch) {
      sb.append(ch);
      return offset + 1;
    }

    if (offset + 2 >= limit) {
      sb.append('&');
      return offset + 1;
    }
    // Cap limit to limit the amount of time spent processing inputs like
    // &a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a
    limit = Math.min(limit, offset + (1 + LONGEST_ENTITY_NAME));

    // Now we know where the entity ends, and that there is at least one
    // character in the entity name
    char ch1 = html.charAt(offset + 1);
    char ch2 = html.charAt(offset + 2);
    int codepoint = -1;
    int tail = limit;
    if ('#' == ch1) {
      // numeric entity
      if ('x' == ch2 || 'X' == ch2) {
        if (limit == offset + 3) {  // No digits
          sb.append('&');
          return offset + 1;
        }
        codepoint = 0;
        // hex literal
        digloop:
        for (int i = offset + 3; i < limit; ++i) {
          char digit = html.charAt(i);
          if (!isHtmlIdContinueChar(digit)) {
            if (i == offset + 3) {
              codepoint = -1;
            }
            if (digit == ';') {
              i += 1;
            }
            tail = i;
            break;
          }
          switch (digit & 0xfff8) {
            case 0x30: case 0x38: // ASCII 48-57 are '0'-'9'
              int decDig = digit & 0xf;
              if (decDig < 10) {
                codepoint = (codepoint << 4) | decDig;
              } else {
                codepoint = -1;
                break digloop;
              }
              break;
            // ASCII 65-70 and 97-102 are 'A'-'Z' && 'a'-'z'
            case 0x40: case 0x60:
              int hexDig = (digit & 0x7);
              if (hexDig != 0 && hexDig < 7) {
                codepoint = (codepoint << 4) | (hexDig + 9);
              } else {
                codepoint = -1;
                break digloop;
              }
              break;
            default:
              codepoint = -1;
              break digloop;
          }
        }
        if (codepoint > Character.MAX_CODE_POINT) {
          codepoint = 0xfffd;  // Unknown.
        }
      } else {
        codepoint = 0;
        // decimal literal
        digloop:
        for (int i = offset + 2; i < limit; ++i) {
          char digit = html.charAt(i);
          if (!isHtmlIdContinueChar(digit)) {
            if (i == offset + 2) {
              codepoint = -1;
            }
            if (digit == ';') {
              i += 1;
            }
            tail = i;
            break;
          }
          switch (digit & 0xfff8) {
            case 0x30: case 0x38: // ASCII 48-57 are '0'-'9'
              int decDig = digit - '0';
              if (decDig < 10) {
                codepoint = (codepoint * 10) + decDig;
              } else {
                codepoint = -1;
                break digloop;
              }
              break;
            default:
              codepoint = -1;
              break digloop;
          }
        }
        if (codepoint > Character.MAX_CODE_POINT) {
          codepoint = 0xfffd;  // Unknown.
        }
      }
    } else {
      Trie longestDecode = null;
      Trie t = ENTITY_TRIE;
      for (int i = offset + 1; i < limit; ++i) {
        char nameChar = html.charAt(i);
        t = t.lookup(nameChar);
        if (t == null) { break; }
        if (t.isTerminal() && mayComplete(inAttribute, html, i, limit)) {
          longestDecode = t;
          tail = i + 1;
        }
      }
      // Try again, case insensitively.
      if (longestDecode == null) {
        t = ENTITY_TRIE;
        for (int i = offset + 1; i < limit; ++i) {
          char nameChar = html.charAt(i);
          if ('Z' >= nameChar && nameChar >= 'A') { nameChar |= 32; }
          t = t.lookup(nameChar);
          if (t == null) { break; }
          if (t.isTerminal() && mayComplete(inAttribute, html, i, limit)) {
            longestDecode = t;
            tail = i + 1;
          }
        }
      }
      if (longestDecode != null) {
        sb.append(longestDecode.getValue());
        return tail;
      }
    }
    if (codepoint < 0) {
      sb.append('&');
      return offset + 1;
    } else {
      sb.appendCodePoint(codepoint);
      return tail;
    }
  }

  private static boolean isHtmlIdContinueChar(char ch) {
    int chLower = ch | 32;
    return ('0' <= ch && ch <= '9')
            || ('a' <= chLower && chLower <= 'z')
            || ('-' == ch);
  }

  /** True if the character at i in html may complete a named character reference */
  private static boolean mayComplete(boolean inAttribute, String html, int i, int limit) {
    if (inAttribute && html.charAt(i) != ';' && i + 1 < limit) {
      // See if the next character blocks treating this as a full match.
      // This avoids problems like "¶" being treated as a decoding in
      //     
      if (continuesCharacterReferenceName(html.charAt(i + 1))) {
        return false;
      }
    }
    return true;
  }

  /**
   * @see comments in issue 254
   */
  private static boolean continuesCharacterReferenceName(char ch) {
    int chLower = ch | 32;
    return ('0' <= ch && ch <= '9')
            || ('a' <= chLower && chLower <= 'z')
            || (ch == '=');
  }

//  /** A possible entity name like "amp" or "gt". */
//  public static boolean isEntityName(String name) {
//    Trie t = ENTITY_TRIE;
//    int n = name.length();
//
//    // Treat AMP the same amp, but not Amp.
//    boolean isUcase = true;
//    for (int i = 0; i < n; ++i) {
//      char ch = name.charAt(i);
//      if (!('A' <= ch && ch <= 'Z')) {
//        isUcase = false;
//        break;
//      }
//    }
//
//    if (isUcase) { name = Strings.toLowerCase(name); }
//
//    for (int i = 0; i < n; ++i) {
//      t = t.lookup(name.charAt(i));
//      if (t == null) { return false; }
//    }
//    return t.isTerminal();
//  }

  private HtmlEntities() { /* uninstantiable */ }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy