All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.rjeschke.txtmark.HTML Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2011 René Jeschke 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.github.rjeschke.txtmark;

import java.util.HashMap;
import java.util.HashSet;

/**
 * HTML utility class.
 * 
 * @author René Jeschke 
 */
class HTML
{
    /** List of valid HTML/XML entity names. */
    private final static String[] ENTITY_NAMES = {
        "Â", "â", "´", "Æ", "æ", "À", "à", "ℵ", 
        "Α", "α", "&", "∧", "∠", "'", "Å", "å", 
        "≈", "Ã", "ã", "Ä", "ä", "„", "Β", "β", 
        "¦", "•", "∩", "Ç", "ç", "¸", "¢", "Χ", 
        "χ", "ˆ", "♣", "≅", "©", "↵", "∪", "¤", 
        "‡", "†", "⇓", "↓", "°", "Δ", "δ", "♦", 
        "÷", "É", "é", "Ê", "ê", "È", "è", "∅", 
        " ", " ", "Ε", "ε", "≡", "Η", "η", "Ð", 
        "ð", "Ë", "ë", "€", "∃", "ƒ", "∀", "½", 
        "¼", "¾", "⁄", "Γ", "γ", "≥", ">", "⇔", 
        "↔", "♥", "…", "Í", "í", "Î", "î", "¡", 
        "Ì", "ì", "ℑ", "∞", "∫", "Ι", "ι", "¿", 
        "∈", "Ï", "ï", "Κ", "κ", "Λ", "λ", "⟨", 
        "«", "⇐", "←", "⌈", "“", "≤", "⌊", "∗", 
        "◊", "‎", "‹", "‘", "<", "¯", "—", "µ", 
        "·", "−", "Μ", "μ", "∇", " ", "–", "≠", 
        "∋", "¬", "∉", "⊄", "Ñ", "ñ", "Ν", "ν", 
        "Ó", "ó", "Ô", "ô", "Œ", "œ", "Ò", "ò", 
        "‾", "Ω", "ω", "Ο", "ο", "⊕", "∨", "ª", 
        "º", "Ø", "ø", "Õ", "õ", "⊗", "Ö", "ö", 
        "¶", "∂", "‰", "⊥", "Φ", "φ", "Π", "π", 
        "ϖ", "±", "£", "″", "′", "∏", "∝", "Ψ", 
        "ψ", """, "√", "⟩", "»", "⇒", "→", "⌉", 
        "”", "ℜ", "®", "⌋", "Ρ", "ρ", "‏", "›", 
        "’", "‚", "Š", "š", "⋅", "§", "­", "Σ", 
        "σ", "ς", "∼", "♠", "⊂", "⊆", "∑", "⊃", 
        "¹", "²", "³", "⊇", "ß", "Τ", "τ", "∴", 
        "Θ", "θ", "ϑ", " ", "þ", "˜", "×", "™", 
        "Ú", "ú", "⇑", "↑", "Û", "û", "Ù", "ù", 
        "¨", "ϒ", "Υ", "υ", "Ü", "ü", "℘", "Ξ", 
        "ξ", "Ý", "ý", "¥", "Ÿ", "ÿ", "Ζ", "ζ", 
        "‍", "‌"
    };
    /** Characters corresponding to ENTITY_NAMES. */
    private final static char[] ENTITY_CHARS = {
        '\u00C2', '\u00E2', '\u00B4', '\u00C6', '\u00E6', '\u00C0', '\u00E0', '\u2135', 
        '\u0391', '\u03B1', '\u0026', '\u2227', '\u2220', '\'', '\u00C5', '\u00E5', 
        '\u2248', '\u00C3', '\u00E3', '\u00C4', '\u00E4', '\u201E', '\u0392', '\u03B2', 
        '\u00A6', '\u2022', '\u2229', '\u00C7', '\u00E7', '\u00B8', '\u00A2', '\u03A7', 
        '\u03C7', '\u02C6', '\u2663', '\u2245', '\u00A9', '\u21B5', '\u222A', '\u00A4', 
        '\u2021', '\u2020', '\u21D3', '\u2193', '\u00B0', '\u0394', '\u03B4', '\u2666', 
        '\u00F7', '\u00C9', '\u00E9', '\u00CA', '\u00EA', '\u00C8', '\u00E8', '\u2205', 
        '\u2003', '\u2002', '\u0395', '\u03B5', '\u2261', '\u0397', '\u03B7', '\u00D0', 
        '\u00F0', '\u00CB', '\u00EB', '\u20AC', '\u2203', '\u0192', '\u2200', '\u00BD', 
        '\u00BC', '\u00BE', '\u2044', '\u0393', '\u03B3', '\u2265', '\u003E', '\u21D4', 
        '\u2194', '\u2665', '\u2026', '\u00CD', '\u00ED', '\u00CE', '\u00EE', '\u00A1', 
        '\u00CC', '\u00EC', '\u2111', '\u221E', '\u222B', '\u0399', '\u03B9', '\u00BF', 
        '\u2208', '\u00CF', '\u00EF', '\u039A', '\u03BA', '\u039B', '\u03BB', '\u2329', 
        '\u00AB', '\u21D0', '\u2190', '\u2308', '\u201C', '\u2264', '\u230A', '\u2217', 
        '\u25CA', '\u200E', '\u2039', '\u2018', '\u003C', '\u00AF', '\u2014', '\u00B5', 
        '\u00B7', '\u2212', '\u039C', '\u03BC', '\u2207', '\u00A0', '\u2013', '\u2260', 
        '\u220B', '\u00AC', '\u2209', '\u2284', '\u00D1', '\u00F1', '\u039D', '\u03BD', 
        '\u00D3', '\u00F3', '\u00D4', '\u00F4', '\u0152', '\u0153', '\u00D2', '\u00F2', 
        '\u203E', '\u03A9', '\u03C9', '\u039F', '\u03BF', '\u2295', '\u2228', '\u00AA', 
        '\u00BA', '\u00D8', '\u00F8', '\u00D5', '\u00F5', '\u2297', '\u00D6', '\u00F6', 
        '\u00B6', '\u2202', '\u2030', '\u22A5', '\u03A6', '\u03C6', '\u03A0', '\u03C0', 
        '\u03D6', '\u00B1', '\u00A3', '\u2033', '\u2032', '\u220F', '\u221D', '\u03A8', 
        '\u03C8', '\u0022', '\u221A', '\u232A', '\u00BB', '\u21D2', '\u2192', '\u2309', 
        '\u201D', '\u211C', '\u00AE', '\u230B', '\u03A1', '\u03C1', '\u200F', '\u203A', 
        '\u2019', '\u201A', '\u0160', '\u0161', '\u22C5', '\u00A7', '\u00AD', '\u03A3', 
        '\u03C3', '\u03C2', '\u223C', '\u2660', '\u2282', '\u2286', '\u2211', '\u2283', 
        '\u00B9', '\u00B2', '\u00B3', '\u2287', '\u00DF', '\u03A4', '\u03C4', '\u2234', 
        '\u0398', '\u03B8', '\u03D1', '\u00DE', '\u00FE', '\u02DC', '\u00D7', '\u2122', 
        '\u00DA', '\u00FA', '\u21D1', '\u2191', '\u00DB', '\u00FB', '\u00D9', '\u00F9', 
        '\u00A8', '\u03D2', '\u03A5', '\u03C5', '\u00DC', '\u00FC', '\u2118', '\u039E', 
        '\u03BE', '\u00DD', '\u00FD', '\u00A5', '\u0178', '\u00FF', '\u0396', '\u03B6', 
        '\u200D', '\u200C'
    };
    /** Valid markdown link prefixes for auto links. */
    private final static String[] LINK_PREFIXES = {
        "http", "https", 
        "ftp", "ftps"
    };

    /** HTML block level elements. */
    private final static HTMLElement[] BLOCK_ELEMENTS = {
        HTMLElement.address,
        HTMLElement.blockquote,
        HTMLElement.del, HTMLElement.div, HTMLElement.dl,
        HTMLElement.fieldset, HTMLElement.form,
        HTMLElement.h1, HTMLElement.h2, HTMLElement.h3, HTMLElement.h4, HTMLElement.h5, HTMLElement.h6, HTMLElement.hr,
        HTMLElement.ins,
        HTMLElement.noscript,
        HTMLElement.ol,
        HTMLElement.p, HTMLElement.pre,
        HTMLElement.table,
        HTMLElement.ul
    };

    /** HTML unsafe elements. */
    private final static HTMLElement[] UNSAFE_ELEMENTS = {
        HTMLElement.applet,
        HTMLElement.head,
        HTMLElement.html,
        HTMLElement.body,
        HTMLElement.frame,
        HTMLElement.frameset,
        HTMLElement.iframe,
        HTMLElement.script,
        HTMLElement.object,
    };
    
    /** Character to entity encoding map. */
    private final static HashMap encodeMap = new HashMap();
    /** Entity to character decoding map. */
    private final static HashMap decodeMap = new HashMap();
    /** Set of valid HTML tags. */
    private final static HashSet HTML_ELEMENTS = new HashSet();
    /** Set of unsafe HTML tags. */
    private final static HashSet HTML_UNSAFE = new HashSet();
    /** Set of HTML block level tags. */
    private final static HashSet HTML_BLOCK_ELEMENTS = new HashSet();
    /** Set of valid markdown link prefixes. */
    private final static HashSet LINK_PREFIX = new HashSet();

    static
    {
        for(final HTMLElement h : HTMLElement.values())
        {
            HTML_ELEMENTS.add(h.toString());
        }
        for(final HTMLElement h : UNSAFE_ELEMENTS)
        {
            HTML_UNSAFE.add(h.toString());
        }
        for(final HTMLElement h : BLOCK_ELEMENTS)
        {
            HTML_BLOCK_ELEMENTS.add(h.toString());
        }
        for(int i = 0; i < ENTITY_NAMES.length; i++)
        {
            encodeMap.put(ENTITY_CHARS[i], ENTITY_NAMES[i]);
            decodeMap.put(ENTITY_NAMES[i], ENTITY_CHARS[i]);
        }
        for(int i = 0; i < LINK_PREFIXES.length; i++)
        {
            LINK_PREFIX.add(LINK_PREFIXES[i]);
        }
    }

    /** Constructor. (Singleton) */
    private HTML()
    {
        //
    }
    
    /**
     * @param value String to check.
     * @return Returns true if the given String is a link prefix.
     */
    public final static boolean isLinkPrefix(final String value)
    {
        return LINK_PREFIX.contains(value);
    }
    
    /**
     * @param value String to check.
     * @return Returns true if the given String is an entity.
     */
    public final static boolean isEntity(final String value)
    {
        return decodeMap.containsKey(value);
    }
    
    /**
     * @param value String to check.
     * @return Returns true if the given String is a HTML tag.
     */
    public final static boolean isHtmlElement(final String value)
    {
        return HTML_ELEMENTS.contains(value);
    }
    
    /**
     * @param value String to check.
     * @return Returns true if the given String is a HTML block level tag.
     */
    public final static boolean isHtmlBlockElement(final String value)
    {
        return HTML_BLOCK_ELEMENTS.contains(value);
    }

    /**
     * @param value String to check.
     * @return Returns true if the given String is an unsafe HTML tag.
     */
    public final static boolean isUnsafeHtmlElement(final String value)
    {
        return HTML_UNSAFE.contains(value);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy