All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.codehaus.plexus.util.xml.pull.EntityReplacementMap Maven / Gradle / Ivy

package org.codehaus.plexus.util.xml.pull;

/*
 * Copyright The Codehaus Foundation.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
public class EntityReplacementMap {
    final String entityName[];

    final char[] entityNameBuf[];

    final String entityReplacement[];

    final char[] entityReplacementBuf[];

    int entityEnd;

    final int entityNameHash[];

    public EntityReplacementMap(String[][] replacements) {
        int length = replacements.length;
        entityName = new String[length];
        entityNameBuf = new char[length][];
        entityReplacement = new String[length];
        entityReplacementBuf = new char[length][];
        entityNameHash = new int[length];

        for (String[] replacement : replacements) {
            defineEntityReplacementText(replacement[0], replacement[1]);
        }
    }

    private void defineEntityReplacementText(String entityName, String replacementText) {
        if (!replacementText.startsWith("&#") && this.entityName != null && replacementText.length() > 1) {
            String tmp = replacementText.substring(1, replacementText.length() - 1);
            for (int i = 0; i < this.entityName.length; i++) {
                if (this.entityName[i] != null && this.entityName[i].equals(tmp)) {
                    replacementText = this.entityReplacement[i];
                }
            }
        }

        // this is to make sure that if interning works we will take advantage of it ...
        char[] entityNameCharData = entityName.toCharArray();
        // noinspection ConstantConditions
        this.entityName[entityEnd] = newString(entityNameCharData, 0, entityName.length());
        entityNameBuf[entityEnd] = entityNameCharData;

        entityReplacement[entityEnd] = replacementText;
        entityReplacementBuf[entityEnd] = replacementText.toCharArray();
        entityNameHash[entityEnd] = fastHash(entityNameBuf[entityEnd], 0, entityNameBuf[entityEnd].length);
        ++entityEnd;
        // TODO disallow < or & in entity replacement text (or ]]>???)
        // TODO keepEntityNormalizedForAttributeValue cached as well ...
    }

    private String newString(char[] cbuf, int off, int len) {
        return new String(cbuf, off, len);
    }

    /**
     * simplistic implementation of hash function that has constant time to compute - so it also means
     * diminishing hash quality for long strings but for XML parsing it should be good enough ...
     */
    private static int fastHash(char ch[], int off, int len) {
        if (len == 0) return 0;
        // assert len >0
        int hash = ch[off]; // hash at beginning
        // try {
        hash = (hash << 7) + ch[off + len - 1]; // hash at the end
        // } catch(ArrayIndexOutOfBoundsException aie) {
        // aie.printStackTrace(); //should never happen ...
        // throw new RuntimeException("this is violation of pre-condition");
        // }
        if (len > 16) hash = (hash << 7) + ch[off + (len / 4)]; // 1/4 from beginning
        if (len > 8) hash = (hash << 7) + ch[off + (len / 2)]; // 1/2 of string size ...
        // notice that hash is at most done 3 times <<7 so shifted by 21 bits 8 bit value
        // so max result == 29 bits so it is quite just below 31 bits for long (2^32) ...
        // assert hash >= 0;
        return hash;
    }

    public static final EntityReplacementMap defaultEntityReplacementMap = new EntityReplacementMap(new String[][] {
        {"nbsp", "\u00a0"},
        {"iexcl", "\u00a1"},
        {"cent", "\u00a2"},
        {"pound", "\u00a3"},
        {"curren", "\u00a4"},
        {"yen", "\u00a5"},
        {"brvbar", "\u00a6"},
        {"sect", "\u00a7"},
        {"uml", "\u00a8"},
        {"copy", "\u00a9"},
        {"ordf", "\u00aa"},
        {"laquo", "\u00ab"},
        {"not", "\u00ac"},
        {"shy", "\u00ad"},
        {"reg", "\u00ae"},
        {"macr", "\u00af"},
        {"deg", "\u00b0"},
        {"plusmn", "\u00b1"},
        {"sup2", "\u00b2"},
        {"sup3", "\u00b3"},
        {"acute", "\u00b4"},
        {"micro", "\u00b5"},
        {"para", "\u00b6"},
        {"middot", "\u00b7"},
        {"cedil", "\u00b8"},
        {"sup1", "\u00b9"},
        {"ordm", "\u00ba"},
        {"raquo", "\u00bb"},
        {"frac14", "\u00bc"},
        {"frac12", "\u00bd"},
        {"frac34", "\u00be"},
        {"iquest", "\u00bf"},
        {"Agrave", "\u00c0"},
        {"Aacute", "\u00c1"},
        {"Acirc", "\u00c2"},
        {"Atilde", "\u00c3"},
        {"Auml", "\u00c4"},
        {"Aring", "\u00c5"},
        {"AElig", "\u00c6"},
        {"Ccedil", "\u00c7"},
        {"Egrave", "\u00c8"},
        {"Eacute", "\u00c9"},
        {"Ecirc", "\u00ca"},
        {"Euml", "\u00cb"},
        {"Igrave", "\u00cc"},
        {"Iacute", "\u00cd"},
        {"Icirc", "\u00ce"},
        {"Iuml", "\u00cf"},
        {"ETH", "\u00d0"},
        {"Ntilde", "\u00d1"},
        {"Ograve", "\u00d2"},
        {"Oacute", "\u00d3"},
        {"Ocirc", "\u00d4"},
        {"Otilde", "\u00d5"},
        {"Ouml", "\u00d6"},
        {"times", "\u00d7"},
        {"Oslash", "\u00d8"},
        {"Ugrave", "\u00d9"},
        {"Uacute", "\u00da"},
        {"Ucirc", "\u00db"},
        {"Uuml", "\u00dc"},
        {"Yacute", "\u00dd"},
        {"THORN", "\u00de"},
        {"szlig", "\u00df"},
        {"agrave", "\u00e0"},
        {"aacute", "\u00e1"},
        {"acirc", "\u00e2"},
        {"atilde", "\u00e3"},
        {"auml", "\u00e4"},
        {"aring", "\u00e5"},
        {"aelig", "\u00e6"},
        {"ccedil", "\u00e7"},
        {"egrave", "\u00e8"},
        {"eacute", "\u00e9"},
        {"ecirc", "\u00ea"},
        {"euml", "\u00eb"},
        {"igrave", "\u00ec"},
        {"iacute", "\u00ed"},
        {"icirc", "\u00ee"},
        {"iuml", "\u00ef"},
        {"eth", "\u00f0"},
        {"ntilde", "\u00f1"},
        {"ograve", "\u00f2"},
        {"oacute", "\u00f3"},
        {"ocirc", "\u00f4"},
        {"otilde", "\u00f5"},
        {"ouml", "\u00f6"},
        {"divide", "\u00f7"},
        {"oslash", "\u00f8"},
        {"ugrave", "\u00f9"},
        {"uacute", "\u00fa"},
        {"ucirc", "\u00fb"},
        {"uuml", "\u00fc"},
        {"yacute", "\u00fd"},
        {"thorn", "\u00fe"},
        {"yuml", "\u00ff"},

        // ----------------------------------------------------------------------
        // Special entities
        // ----------------------------------------------------------------------

        {"OElig", "\u0152"},
        {"oelig", "\u0153"},
        {"Scaron", "\u0160"},
        {"scaron", "\u0161"},
        {"Yuml", "\u0178"},
        {"circ", "\u02c6"},
        {"tilde", "\u02dc"},
        {"ensp", "\u2002"},
        {"emsp", "\u2003"},
        {"thinsp", "\u2009"},
        {"zwnj", "\u200c"},
        {"zwj", "\u200d"},
        {"lrm", "\u200e"},
        {"rlm", "\u200f"},
        {"ndash", "\u2013"},
        {"mdash", "\u2014"},
        {"lsquo", "\u2018"},
        {"rsquo", "\u2019"},
        {"sbquo", "\u201a"},
        {"ldquo", "\u201c"},
        {"rdquo", "\u201d"},
        {"bdquo", "\u201e"},
        {"dagger", "\u2020"},
        {"Dagger", "\u2021"},
        {"permil", "\u2030"},
        {"lsaquo", "\u2039"},
        {"rsaquo", "\u203a"},
        {"euro", "\u20ac"},

        // ----------------------------------------------------------------------
        // Symbol entities
        // ----------------------------------------------------------------------

        {"fnof", "\u0192"},
        {"Alpha", "\u0391"},
        {"Beta", "\u0392"},
        {"Gamma", "\u0393"},
        {"Delta", "\u0394"},
        {"Epsilon", "\u0395"},
        {"Zeta", "\u0396"},
        {"Eta", "\u0397"},
        {"Theta", "\u0398"},
        {"Iota", "\u0399"},
        {"Kappa", "\u039a"},
        {"Lambda", "\u039b"},
        {"Mu", "\u039c"},
        {"Nu", "\u039d"},
        {"Xi", "\u039e"},
        {"Omicron", "\u039f"},
        {"Pi", "\u03a0"},
        {"Rho", "\u03a1"},
        {"Sigma", "\u03a3"},
        {"Tau", "\u03a4"},
        {"Upsilon", "\u03a5"},
        {"Phi", "\u03a6"},
        {"Chi", "\u03a7"},
        {"Psi", "\u03a8"},
        {"Omega", "\u03a9"},
        {"alpha", "\u03b1"},
        {"beta", "\u03b2"},
        {"gamma", "\u03b3"},
        {"delta", "\u03b4"},
        {"epsilon", "\u03b5"},
        {"zeta", "\u03b6"},
        {"eta", "\u03b7"},
        {"theta", "\u03b8"},
        {"iota", "\u03b9"},
        {"kappa", "\u03ba"},
        {"lambda", "\u03bb"},
        {"mu", "\u03bc"},
        {"nu", "\u03bd"},
        {"xi", "\u03be"},
        {"omicron", "\u03bf"},
        {"pi", "\u03c0"},
        {"rho", "\u03c1"},
        {"sigmaf", "\u03c2"},
        {"sigma", "\u03c3"},
        {"tau", "\u03c4"},
        {"upsilon", "\u03c5"},
        {"phi", "\u03c6"},
        {"chi", "\u03c7"},
        {"psi", "\u03c8"},
        {"omega", "\u03c9"},
        {"thetasym", "\u03d1"},
        {"upsih", "\u03d2"},
        {"piv", "\u03d6"},
        {"bull", "\u2022"},
        {"hellip", "\u2026"},
        {"prime", "\u2032"},
        {"Prime", "\u2033"},
        {"oline", "\u203e"},
        {"frasl", "\u2044"},
        {"weierp", "\u2118"},
        {"image", "\u2111"},
        {"real", "\u211c"},
        {"trade", "\u2122"},
        {"alefsym", "\u2135"},
        {"larr", "\u2190"},
        {"uarr", "\u2191"},
        {"rarr", "\u2192"},
        {"darr", "\u2193"},
        {"harr", "\u2194"},
        {"crarr", "\u21b5"},
        {"lArr", "\u21d0"},
        {"uArr", "\u21d1"},
        {"rArr", "\u21d2"},
        {"dArr", "\u21d3"},
        {"hArr", "\u21d4"},
        {"forall", "\u2200"},
        {"part", "\u2202"},
        {"exist", "\u2203"},
        {"empty", "\u2205"},
        {"nabla", "\u2207"},
        {"isin", "\u2208"},
        {"notin", "\u2209"},
        {"ni", "\u220b"},
        {"prod", "\u220f"},
        {"sum", "\u2211"},
        {"minus", "\u2212"},
        {"lowast", "\u2217"},
        {"radic", "\u221a"},
        {"prop", "\u221d"},
        {"infin", "\u221e"},
        {"ang", "\u2220"},
        {"and", "\u2227"},
        {"or", "\u2228"},
        {"cap", "\u2229"},
        {"cup", "\u222a"},
        {"int", "\u222b"},
        {"there4", "\u2234"},
        {"sim", "\u223c"},
        {"cong", "\u2245"},
        {"asymp", "\u2248"},
        {"ne", "\u2260"},
        {"equiv", "\u2261"},
        {"le", "\u2264"},
        {"ge", "\u2265"},
        {"sub", "\u2282"},
        {"sup", "\u2283"},
        {"nsub", "\u2284"},
        {"sube", "\u2286"},
        {"supe", "\u2287"},
        {"oplus", "\u2295"},
        {"otimes", "\u2297"},
        {"perp", "\u22a5"},
        {"sdot", "\u22c5"},
        {"lceil", "\u2308"},
        {"rceil", "\u2309"},
        {"lfloor", "\u230a"},
        {"rfloor", "\u230b"},
        {"lang", "\u2329"},
        {"rang", "\u232a"},
        {"loz", "\u25ca"},
        {"spades", "\u2660"},
        {"clubs", "\u2663"},
        {"hearts", "\u2665"},
        {"diams", "\u2666"}
    });
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy