All Downloads are FREE. Search and download functionalities are using the official Maven repository.

twitter4j.HTMLEntity Maven / Gradle / Ivy

There is a newer version: 4.1.2
Show newest version
/*
 * Copyright 2007 Yusuke Yamamoto
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package twitter4j;

import twitter4j.v1.HashtagEntity;
import twitter4j.v1.MediaEntity;
import twitter4j.v1.URLEntity;
import twitter4j.v1.UserMentionEntity;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

final class HTMLEntity {

    static String escape(String original) {
        StringBuilder buf = new StringBuilder(original);
        escape(buf);
        return buf.toString();
    }

    static void escape(StringBuilder original) {
        int index = 0;
        String escaped;
        while (index < original.length()) {
            escaped = entityEscapeMap.get(original.substring(index, index + 1));
            if (escaped != null) {
                original.replace(index, index + 1, escaped);
                index += escaped.length();
            } else {
                index++;
            }
        }
    }

    static String unescape(String original) {
        String returnValue = null;
        if (original != null) {
            StringBuilder buf = new StringBuilder(original);
            unescape(buf);
            returnValue = buf.toString();
        }
        return returnValue;
    }

    static void unescape(StringBuilder original) {
        int index = 0;
        int semicolonIndex;
        String escaped;
        String entity;
        while (index < original.length()) {
            index = original.indexOf("&", index);
            if (-1 == index) {
                break;
            }
            semicolonIndex = original.indexOf(";", index);
            if (-1 != semicolonIndex) {
                escaped = original.substring(index, semicolonIndex + 1);
                entity = escapeEntityMap.get(escaped);
                if (entity != null) {
                    original.replace(index, semicolonIndex + 1, entity);
                }
                index++;
            } else {
                break;
            }
        }
    }

    /**
     * @author Yusuke Yamamoto - yusuke at mac.com
     * @author Philip Hachey - philip dot hachey at gmail dot com
     */
    static String unescapeAndSlideEntityIncdices(String text, UserMentionEntity[] userMentionEntities,
                                                 URLEntity[] urlEntities, HashtagEntity[] hashtagEntities,
                                                 MediaEntity[] mediaEntities) {

        int entityIndexesLength = 0;
        entityIndexesLength += userMentionEntities == null ? 0 : userMentionEntities.length;
        entityIndexesLength += urlEntities == null ? 0 : urlEntities.length;
        entityIndexesLength += hashtagEntities == null ? 0 : hashtagEntities.length;
        entityIndexesLength += mediaEntities == null ? 0 : mediaEntities.length;

        EntityIndex[] entityIndexes = new EntityIndex[entityIndexesLength];
        int copyStartIndex = 0;
        if (userMentionEntities != null) {
            System.arraycopy(userMentionEntities, 0, entityIndexes, copyStartIndex, userMentionEntities.length);
            copyStartIndex += userMentionEntities.length;
        }

        if (urlEntities != null) {
            System.arraycopy(urlEntities, 0, entityIndexes, copyStartIndex, urlEntities.length);
            copyStartIndex += urlEntities.length;
        }

        if (hashtagEntities != null) {
            System.arraycopy(hashtagEntities, 0, entityIndexes, copyStartIndex, hashtagEntities.length);
            copyStartIndex += hashtagEntities.length;
        }

        if (mediaEntities != null) {
            System.arraycopy(mediaEntities, 0, entityIndexes, copyStartIndex, mediaEntities.length);
        }

        Arrays.sort(entityIndexes);
        boolean handlingStart = true;
        int entityIndex = 0;

        int delta = 0;
        int semicolonIndex;
        String escaped;
        String entity;
        StringBuilder unescaped = new StringBuilder(text.length());

        /*
         * Slide indices of twitter entities not only when replacing character
         * entity references but also adjust the twitter code point based
         * indexes with Java standard character indexes. See: HTMLEntityTest.
         * testUnescapeAndSlideEntityIncdicesWithSurrogateCodePoints
         */
        int textCodePointLength = text.codePointCount(0, text.length());
        int codePoint;
        for (int index = 0, twitterIndex = 0; index < text.length(); index +=
                Character.charCount(codePoint), twitterIndex++) {
            codePoint = text.codePointAt(index);
            if (codePoint == '&') {
                semicolonIndex = text.indexOf(";", index);
                if (-1 != semicolonIndex) {
                    escaped = text.substring(index, semicolonIndex + 1);
                    entity = escapeEntityMap.get(escaped);
                    if (entity != null) {
                        unescaped.append(entity);
                        index = semicolonIndex;
                        twitterIndex = text.codePointCount(0, semicolonIndex);
                        delta = 1 - escaped.length();
                    } else {
                        unescaped.appendCodePoint(codePoint);
                    }
                } else {
                    unescaped.appendCodePoint(codePoint);
                }
            } else {
                unescaped.appendCodePoint(codePoint);
            }
            if (entityIndex < entityIndexes.length) {
                if (handlingStart) {
                    if (entityIndexes[entityIndex].getStart() == (delta + twitterIndex)) {
                        entityIndexes[entityIndex]
                                .setStart(unescaped.length() - Character.charCount(text.codePointAt(index)));
                        handlingStart = false;
                    }
                } else if (entityIndexes[entityIndex].getEnd() == (delta + twitterIndex)) {
                    entityIndexes[entityIndex]
                            .setEnd(unescaped.length() - Character.charCount(text.codePointAt(index)));
                    entityIndex++;
                    handlingStart = true;
                }
            }
            delta = 0;
        }
        if (entityIndex < entityIndexes.length) {
            if (entityIndexes[entityIndex].getEnd() == textCodePointLength) {
                entityIndexes[entityIndex].setEnd(unescaped.length());
            }
        }

        return unescaped.toString();
    }

    private static final Map entityEscapeMap = new HashMap<>();
    private static final Map escapeEntityMap = new HashMap<>();

    static {
        String[][] entities =
                {{" ", " "/* no-break space = non-breaking space */, "\u00A0"}
                        , {"¡", "¡"/* inverted exclamation mark */, "\u00A1"}
                        , {"¢", "¢"/* cent sign */, "\u00A2"}
                        , {"£", "£"/* pound sign */, "\u00A3"}
                        , {"¤", "¤"/* currency sign */, "\u00A4"}
                        , {"¥", "¥"/* yen sign = yuan sign */, "\u00A5"}
                        , {"¦", "¦"/* broken bar = broken vertical bar */, "\u00A6"}
                        , {"§", "§"/* section sign */, "\u00A7"}
                        , {"¨", "¨"/* diaeresis = spacing diaeresis */, "\u00A8"}
                        , {"©", "©"/* copyright sign */, "\u00A9"}
                        , {"ª", "ª"/* feminine ordinal indicator */, "\u00AA"}
                        , {"«", "«"/* left-pointing double angle quotation mark = left pointing guillemet */, "\u00AB"}
                        , {"¬", "¬"/* not sign = discretionary hyphen */, "\u00AC"}
                        , {"­", "­"/* soft hyphen = discretionary hyphen */, "\u00AD"}
                        , {"®", "®"/* registered sign = registered trade mark sign */, "\u00AE"}
                        , {"¯", "¯"/* macron = spacing macron = overline = APL overbar */, "\u00AF"}
                        , {"°", "°"/* degree sign */, "\u00B0"}
                        , {"±", "±"/* plus-minus sign = plus-or-minus sign */, "\u00B1"}
                        , {"²", "²"/* superscript two = superscript digit two = squared */, "\u00B2"}
                        , {"³", "³"/* superscript three = superscript digit three = cubed */, "\u00B3"}
                        , {"´", "´"/* acute accent = spacing acute */, "\u00B4"}
                        , {"µ", "µ"/* micro sign */, "\u00B5"}
                        , {"¶", "¶"/* pilcrow sign = paragraph sign */, "\u00B6"}
                        , {"·", "·"/* middle dot = Georgian comma = Greek middle dot */, "\u00B7"}
                        , {"¸", "¸"/* cedilla = spacing cedilla */, "\u00B8"}
                        , {"¹", "¹"/* superscript one = superscript digit one */, "\u00B9"}
                        , {"º", "º"/* masculine ordinal indicator */, "\u00BA"}
                        , {"»", "»"/* right-pointing double angle quotation mark = right pointing guillemet */, "\u00BB"}
                        , {"¼", "¼"/* vulgar fraction one quarter = fraction one quarter */, "\u00BC"}
                        , {"½", "½"/* vulgar fraction one half = fraction one half */, "\u00BD"}
                        , {"¾", "¾"/* vulgar fraction three quarters = fraction three quarters */, "\u00BE"}
                        , {"¿", "¿"/* inverted question mark = turned question mark */, "\u00BF"}
                        , {"À", "À"/* latin capital letter A with grave = latin capital letter A grave */, "\u00C0"}
                        , {"Á", "Á"/* latin capital letter A with acute */, "\u00C1"}
                        , {"Â", "Â"/* latin capital letter A with circumflex */, "\u00C2"}
                        , {"Ã", "Ã"/* latin capital letter A with tilde */, "\u00C3"}
                        , {"Ä", "Ä"/* latin capital letter A with diaeresis */, "\u00C4"}
                        , {"Å", "Å"/* latin capital letter A with ring above = latin capital letter A ring */, "\u00C5"}
                        , {"Æ", "Æ"/* latin capital letter AE = latin capital ligature AE */, "\u00C6"}
                        , {"Ç", "Ç"/* latin capital letter C with cedilla */, "\u00C7"}
                        , {"È", "È"/* latin capital letter E with grave */, "\u00C8"}
                        , {"É", "É"/* latin capital letter E with acute */, "\u00C9"}
                        , {"Ê", "Ê"/* latin capital letter E with circumflex */, "\u00CA"}
                        , {"Ë", "Ë"/* latin capital letter E with diaeresis */, "\u00CB"}
                        , {"Ì", "Ì"/* latin capital letter I with grave */, "\u00CC"}
                        , {"Í", "Í"/* latin capital letter I with acute */, "\u00CD"}
                        , {"Î", "Î"/* latin capital letter I with circumflex */, "\u00CE"}
                        , {"Ï", "Ï"/* latin capital letter I with diaeresis */, "\u00CF"}
                        , {"Ð", "Ð"/* latin capital letter ETH */, "\u00D0"}
                        , {"Ñ", "Ñ"/* latin capital letter N with tilde */, "\u00D1"}
                        , {"Ò", "Ò"/* latin capital letter O with grave */, "\u00D2"}
                        , {"Ó", "Ó"/* latin capital letter O with acute */, "\u00D3"}
                        , {"Ô", "Ô"/* latin capital letter O with circumflex */, "\u00D4"}
                        , {"Õ", "Õ"/* latin capital letter O with tilde */, "\u00D5"}
                        , {"Ö", "Ö"/* latin capital letter O with diaeresis */, "\u00D6"}
                        , {"×", "×"/* multiplication sign */, "\u00D7"}
                        , {"Ø", "Ø"/* latin capital letter O with stroke = latin capital letter O slash */, "\u00D8"}
                        , {"Ù", "Ù"/* latin capital letter U with grave */, "\u00D9"}
                        , {"Ú", "Ú"/* latin capital letter U with acute */, "\u00DA"}
                        , {"Û", "Û"/* latin capital letter U with circumflex */, "\u00DB"}
                        , {"Ü", "Ü"/* latin capital letter U with diaeresis */, "\u00DC"}
                        , {"Ý", "Ý"/* latin capital letter Y with acute */, "\u00DD"}
                        , {"Þ", "Þ"/* latin capital letter THORN */, "\u00DE"}
                        , {"ß", "ß"/* latin small letter sharp s = ess-zed */, "\u00DF"}
                        , {"à", "à"/* latin small letter a with grave = latin small letter a grave */, "\u00E0"}
                        , {"á", "á"/* latin small letter a with acute */, "\u00E1"}
                        , {"â", "â"/* latin small letter a with circumflex */, "\u00E2"}
                        , {"ã", "ã"/* latin small letter a with tilde */, "\u00E3"}
                        , {"ä", "ä"/* latin small letter a with diaeresis */, "\u00E4"}
                        , {"å", "å"/* latin small letter a with ring above = latin small letter a ring */, "\u00E5"}
                        , {"æ", "æ"/* latin small letter ae = latin small ligature ae */, "\u00E6"}
                        , {"ç", "ç"/* latin small letter c with cedilla */, "\u00E7"}
                        , {"è", "è"/* latin small letter e with grave */, "\u00E8"}
                        , {"é", "é"/* latin small letter e with acute */, "\u00E9"}
                        , {"ê", "ê"/* latin small letter e with circumflex */, "\u00EA"}
                        , {"ë", "ë"/* latin small letter e with diaeresis */, "\u00EB"}
                        , {"ì", "ì"/* latin small letter i with grave */, "\u00EC"}
                        , {"í", "í"/* latin small letter i with acute */, "\u00ED"}
                        , {"î", "î"/* latin small letter i with circumflex */, "\u00EE"}
                        , {"ï", "ï"/* latin small letter i with diaeresis */, "\u00EF"}
                        , {"ð", "ð"/* latin small letter eth */, "\u00F0"}
                        , {"ñ", "ñ"/* latin small letter n with tilde */, "\u00F1"}
                        , {"ò", "ò"/* latin small letter o with grave */, "\u00F2"}
                        , {"ó", "ó"/* latin small letter o with acute */, "\u00F3"}
                        , {"ô", "ô"/* latin small letter o with circumflex */, "\u00F4"}
                        , {"õ", "õ"/* latin small letter o with tilde */, "\u00F5"}
                        , {"ö", "ö"/* latin small letter o with diaeresis */, "\u00F6"}
                        , {"÷", "÷"/* division sign */, "\u00F7"}
                        , {"ø", "ø"/* latin small letter o with stroke = latin small letter o slash */, "\u00F8"}
                        , {"ù", "ù"/* latin small letter u with grave */, "\u00F9"}
                        , {"ú", "ú"/* latin small letter u with acute */, "\u00FA"}
                        , {"û", "û"/* latin small letter u with circumflex */, "\u00FB"}
                        , {"ü", "ü"/* latin small letter u with diaeresis */, "\u00FC"}
                        , {"ý", "ý"/* latin small letter y with acute */, "\u00FD"}
                        , {"þ", "þ"/* latin small letter thorn with */, "\u00FE"}
                        , {"ÿ", "ÿ"/* latin small letter y with diaeresis */, "\u00FF"}
                        , {"ƒ", "ƒ"/* latin small f with hook = function = florin */, "\u0192"}
/* Greek */
                        , {"Α", "Α"/* greek capital letter alpha */, "\u0391"}
                        , {"Β", "Β"/* greek capital letter beta */, "\u0392"}
                        , {"Γ", "Γ"/* greek capital letter gamma */, "\u0393"}
                        , {"Δ", "Δ"/* greek capital letter delta */, "\u0394"}
                        , {"Ε", "Ε"/* greek capital letter epsilon */, "\u0395"}
                        , {"Ζ", "Ζ"/* greek capital letter zeta */, "\u0396"}
                        , {"Η", "Η"/* greek capital letter eta */, "\u0397"}
                        , {"Θ", "Θ"/* greek capital letter theta */, "\u0398"}
                        , {"Ι", "Ι"/* greek capital letter iota */, "\u0399"}
                        , {"Κ", "Κ"/* greek capital letter kappa */, "\u039A"}
                        , {"Λ", "Λ"/* greek capital letter lambda */, "\u039B"}
                        , {"Μ", "Μ"/* greek capital letter mu */, "\u039C"}
                        , {"Ν", "Ν"/* greek capital letter nu */, "\u039D"}
                        , {"Ξ", "Ξ"/* greek capital letter xi */, "\u039E"}
                        , {"Ο", "Ο"/* greek capital letter omicron */, "\u039F"}
                        , {"Π", "Π"/* greek capital letter pi */, "\u03A0"}
                        , {"Ρ", "Ρ"/* greek capital letter rho */, "\u03A1"}
/* there is no Sigmaf and no \u03A2 */
                        , {"Σ", "Σ"/* greek capital letter sigma */, "\u03A3"}
                        , {"Τ", "Τ"/* greek capital letter tau */, "\u03A4"}
                        , {"Υ", "Υ"/* greek capital letter upsilon */, "\u03A5"}
                        , {"Φ", "Φ"/* greek capital letter phi */, "\u03A6"}
                        , {"Χ", "Χ"/* greek capital letter chi */, "\u03A7"}
                        , {"Ψ", "Ψ"/* greek capital letter psi */, "\u03A8"}
                        , {"Ω", "Ω"/* greek capital letter omega */, "\u03A9"}
                        , {"α", "α"/* greek small letter alpha */, "\u03B1"}
                        , {"β", "β"/* greek small letter beta */, "\u03B2"}
                        , {"γ", "γ"/* greek small letter gamma */, "\u03B3"}
                        , {"δ", "δ"/* greek small letter delta */, "\u03B4"}
                        , {"ε", "ε"/* greek small letter epsilon */, "\u03B5"}
                        , {"ζ", "ζ"/* greek small letter zeta */, "\u03B6"}
                        , {"η", "η"/* greek small letter eta */, "\u03B7"}
                        , {"θ", "θ"/* greek small letter theta */, "\u03B8"}
                        , {"ι", "ι"/* greek small letter iota */, "\u03B9"}
                        , {"κ", "κ"/* greek small letter kappa */, "\u03BA"}
                        , {"λ", "λ"/* greek small letter lambda */, "\u03BB"}
                        , {"μ", "μ"/* greek small letter mu */, "\u03BC"}
                        , {"ν", "ν"/* greek small letter nu */, "\u03BD"}
                        , {"ξ", "ξ"/* greek small letter xi */, "\u03BE"}
                        , {"ο", "ο"/* greek small letter omicron */, "\u03BF"}
                        , {"π", "π"/* greek small letter pi */, "\u03C0"}
                        , {"ρ", "ρ"/* greek small letter rho */, "\u03C1"}
                        , {"ς", "ς"/* greek small letter final sigma */, "\u03C2"}
                        , {"σ", "σ"/* greek small letter sigma */, "\u03C3"}
                        , {"τ", "τ"/* greek small letter tau */, "\u03C4"}
                        , {"υ", "υ"/* greek small letter upsilon */, "\u03C5"}
                        , {"φ", "φ"/* greek small letter phi */, "\u03C6"}
                        , {"χ", "χ"/* greek small letter chi */, "\u03C7"}
                        , {"ψ", "ψ"/* greek small letter psi */, "\u03C8"}
                        , {"ω", "ω"/* greek small letter omega */, "\u03C9"}
                        , {"ϑ", "ϑ"/* greek small letter theta symbol */, "\u03D1"}
                        , {"ϒ", "ϒ"/* greek upsilon with hook symbol */, "\u03D2"}
                        , {"ϖ", "ϖ"/* greek pi symbol */, "\u03D6"}
/* General Punctuation */
                        , {"•", "•"/* bullet = black small circle */, "\u2022"}
/* bullet is NOT the same as bullet operator  ,"\u2219*/
                        , {"…", "…"/* horizontal ellipsis = three dot leader */, "\u2026"}
                        , {"′", "′"/* prime = minutes = feet */, "\u2032"}
                        , {"″", "″"/* double prime = seconds = inches */, "\u2033"}
                        , {"‾", "‾"/* overline = spacing overscore */, "\u203E"}
                        , {"⁄", "⁄"/* fraction slash */, "\u2044"}
/* Letterlike Symbols */
                        , {"℘", "℘"/* script capital P = power set = Weierstrass p */, "\u2118"}
                        , {"ℑ", "ℑ"/* blackletter capital I = imaginary part */, "\u2111"}
                        , {"ℜ", "ℜ"/* blackletter capital R = real part symbol */, "\u211C"}
                        , {"™", "™"/* trade mark sign */, "\u2122"}
                        , {"ℵ", "ℵ"/* alef symbol = first transfinite cardinal */, "\u2135"}
/* alef symbol is NOT the same as hebrew letter alef  ,"\u05D0"}*/
/* Arrows */
                        , {"←", "←"/* leftwards arrow */, "\u2190"}
                        , {"↑", "↑"/* upwards arrow */, "\u2191"}
                        , {"→", "→"/* rightwards arrow */, "\u2192"}
                        , {"↓", "↓"/* downwards arrow */, "\u2193"}
                        , {"↔", "↔"/* left right arrow */, "\u2194"}
                        , {"↵", "↵"/* downwards arrow with corner leftwards = carriage return */, "\u21B5"}
                        , {"⇐", "⇐"/* leftwards double arrow */, "\u21D0"}
/* Unicode does not say that lArr is the same as the 'is implied by' arrow but also does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests */
                        , {"⇑", "⇑"/* upwards double arrow */, "\u21D1"}
                        , {"⇒", "⇒"/* rightwards double arrow */, "\u21D2"}
/* Unicode does not say this is the 'implies' character but does not have another character with this function so ? rArr can be used for 'implies' as ISOtech suggests */
                        , {"⇓", "⇓"/* downwards double arrow */, "\u21D3"}
                        , {"⇔", "⇔"/* left right double arrow */, "\u21D4"}
/* Mathematical Operators */
                        , {"∀", "∀"/* for all */, "\u2200"}
                        , {"∂", "∂"/* partial differential */, "\u2202"}
                        , {"∃", "∃"/* there exists */, "\u2203"}
                        , {"∅", "∅"/* empty set = null set = diameter */, "\u2205"}
                        , {"∇", "∇"/* nabla = backward difference */, "\u2207"}
                        , {"∈", "∈"/* element of */, "\u2208"}
                        , {"∉", "∉"/* not an element of */, "\u2209"}
                        , {"∋", "∋"/* contains as member */, "\u220B"}
/* should there be a more memorable name than 'ni'? */
                        , {"∏", "∏"/* n-ary product = product sign */, "\u220F"}
/* prod is NOT the same character as ,"\u03A0"}*/
                        , {"∑", "∑"/* n-ary sumation */, "\u2211"}
/* sum is NOT the same character as ,"\u03A3"}*/
                        , {"−", "−"/* minus sign */, "\u2212"}
                        , {"∗", "∗"/* asterisk operator */, "\u2217"}
                        , {"√", "√"/* square root = radical sign */, "\u221A"}
                        , {"∝", "∝"/* proportional to */, "\u221D"}
                        , {"∞", "∞"/* infinity */, "\u221E"}
                        , {"∠", "∠"/* angle */, "\u2220"}
                        , {"∧", "∧"/* logical and = wedge */, "\u2227"}
                        , {"∨", "∨"/* logical or = vee */, "\u2228"}
                        , {"∩", "∩"/* intersection = cap */, "\u2229"}
                        , {"∪", "∪"/* union = cup */, "\u222A"}
                        , {"∫", "∫"/* integral */, "\u222B"}
                        , {"∴", "∴"/* therefore */, "\u2234"}
                        , {"∼", "∼"/* tilde operator = varies with = similar to */, "\u223C"}
/* tilde operator is NOT the same character as the tilde  ,"\u007E"}*/
                        , {"≅", "≅"/* approximately equal to */, "\u2245"}
                        , {"≈", "≈"/* almost equal to = asymptotic to */, "\u2248"}
                        , {"≠", "≠"/* not equal to */, "\u2260"}
                        , {"≡", "≡"/* identical to */, "\u2261"}
                        , {"≤", "≤"/* less-than or equal to */, "\u2264"}
                        , {"≥", "≥"/* greater-than or equal to */, "\u2265"}
                        , {"⊂", "⊂"/* subset of */, "\u2282"}
                        , {"⊃", "⊃"/* superset of */, "\u2283"}
/* note that nsup  'not a superset of  ,"\u2283"}*/
                        , {"⊆", "⊆"/* subset of or equal to */, "\u2286"}
                        , {"⊇", "⊇"/* superset of or equal to */, "\u2287"}
                        , {"⊕", "⊕"/* circled plus = direct sum */, "\u2295"}
                        , {"⊗", "⊗"/* circled times = vector product */, "\u2297"}
                        , {"⊥", "⊥"/* up tack = orthogonal to = perpendicular */, "\u22A5"}
                        , {"⋅", "⋅"/* dot operator */, "\u22C5"}
/* dot operator is NOT the same character as ,"\u00B7"}
/* Miscellaneous Technical */
                        , {"⌈", "⌈"/* left ceiling = apl upstile */, "\u2308"}
                        , {"⌉", "⌉"/* right ceiling */, "\u2309"}
                        , {"⌊", "⌊"/* left floor = apl downstile */, "\u230A"}
                        , {"⌋", "⌋"/* right floor */, "\u230B"}
                        , {"⟨", "〈"/* left-pointing angle bracket = bra */, "\u2329"}
/* lang is NOT the same character as ,"\u003C"}*/
                        , {"⟩", "〉"/* right-pointing angle bracket = ket */, "\u232A"}
/* rang is NOT the same character as ,"\u003E"}*/
/* Geometric Shapes */
                        , {"◊", "◊"/* lozenge */, "\u25CA"}
/* Miscellaneous Symbols */
                        , {"♠", "♠"/* black spade suit */, "\u2660"}
/* black here seems to mean filled as opposed to hollow */
                        , {"♣", "♣"/* black club suit = shamrock */, "\u2663"}
                        , {"♥", "♥"/* black heart suit = valentine */, "\u2665"}
                        , {"♦", "♦"/* black diamond suit */, "\u2666"}
                        , {""", """ /* quotation mark = APL quote */, "\""}
                        , {"&", "&" /* ampersand */, "\u0026"}
                        , {"<", "<" /* less-than sign */, "\u003C"}
                        , {">", ">" /* greater-than sign */, "\u003E"}
/* Latin Extended-A */
                        , {"Œ", "Œ" /* latin capital ligature OE */, "\u0152"}
                        , {"œ", "œ" /* latin small ligature oe */, "\u0153"}
/* ligature is a misnomer  this is a separate character in some languages */
                        , {"Š", "Š" /* latin capital letter S with caron */, "\u0160"}
                        , {"š", "š" /* latin small letter s with caron */, "\u0161"}
                        , {"Ÿ", "Ÿ" /* latin capital letter Y with diaeresis */, "\u0178"}
/* Spacing Modifier Letters */
                        , {"ˆ", "ˆ" /* modifier letter circumflex accent */, "\u02C6"}
                        , {"˜", "˜" /* small tilde */, "\u02DC"}
/* General Punctuation */
                        , {" ", " "/* en space */, "\u2002"}
                        , {" ", " "/* em space */, "\u2003"}
                        , {" ", " "/* thin space */, "\u2009"}
                        , {"‌", "‌"/* zero width non-joiner */, "\u200C"}
                        , {"‍", "‍"/* zero width joiner */, "\u200D"}
                        , {"‎", "‎"/* left-to-right mark */, "\u200E"}
                        , {"‏", "‏"/* right-to-left mark */, "\u200F"}
                        , {"–", "–"/* en dash */, "\u2013"}
                        , {"—", "—"/* em dash */, "\u2014"}
                        , {"‘", "‘"/* left single quotation mark */, "\u2018"}
                        , {"’", "’"/* right single quotation mark */, "\u2019"}
                        , {"‚", "‚"/* single low-9 quotation mark */, "\u201A"}
                        , {"“", "“"/* left double quotation mark */, "\u201C"}
                        , {"”", "”"/* right double quotation mark */, "\u201D"}
                        , {"„", "„"/* double low-9 quotation mark */, "\u201E"}
                        , {"†", "†"/* dagger */, "\u2020"}
                        , {"‡", "‡"/* double dagger */, "\u2021"}
                        , {"‰", "‰"/* per mille sign */, "\u2030"}
                        , {"‹", "‹"/* single left-pointing angle quotation mark */, "\u2039"}
/* lsaquo is proposed but not yet ISO standardized */
                        , {"›", "›"/* single right-pointing angle quotation mark */, "\u203A"}
/* rsaquo is proposed but not yet ISO standardized */
                        , {"€", "€" /* euro sign */, "\u20AC"}};
        for (String[] entity : entities) {
            entityEscapeMap.put(entity[2], entity[0]);
            escapeEntityMap.put(entity[0], entity[2]);
            escapeEntityMap.put(entity[1], entity[2]);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy