All Downloads are FREE. Search and download functionalities are using the official Maven repository.

commonMain.HtmlEntity.kt Maven / Gradle / Ivy

There is a newer version: 2.12.3
Show newest version
/*
 * Copyright 2019-2021 Mamoe Technologies and contributors.
 *
 * 此源代码的使用受 GNU AFFERO GENERAL PUBLIC LICENSE version 3 许可证的约束, 可以在以下链接找到该许可证.
 * Use of this source code is governed by the GNU AGPLv3 license that can be found through the following link.
 *
 * https://github.com/mamoe/mirai/blob/dev/LICENSE
 */

package net.mamoe.mirai.utils

@Suppress("RegExpRedundantEscape")
private val STR_TO_CHAR_PATTERN = """\&(\#?[A-Za-z0-9]+?)\;""".toRegex()

public fun String.decodeHtmlEscape(): String = replace(STR_TO_CHAR_PATTERN) { match ->
    STR_TO_CHAR_MAPPINGS[match.value]?.let { return@replace it }
    val match1 = match.groups[1]!!.value
    if (match1.length > 1 && match1[0] == '#') {
        if (match1.length > 2) {
            if (match1[1] == 'x') { // hex
                match1.substring(2).toIntOrNull(16)?.let {
                    return@replace it.toChar().toString()
                }
            }
        }
        match1.substring(1).toIntOrNull()?.let {
            return@replace it.toChar().toString()
        }
    }

    match.value
}


private val STR_TO_CHAR_MAPPINGS: Map by lazy {
//
    val result = HashMap(223)
    result["&"] = "\u0026"
    result["<"] = "\u003c"
    result[">"] = "\u003e"
    result[" "] = "\u00a0"
    result["¡"] = "\u00a1"
    result["¢"] = "\u00a2"
    result["£"] = "\u00a3"
    result["¤"] = "\u00a4"
    result["¥"] = "\u00a5"
    result["¦"] = "\u00a6"
    result["§"] = "\u00a7"
    result["¨"] = "\u00a8"
    result["©"] = "\u00a9"
    result["ª"] = "\u00aa"
    result["«"] = "\u00ab"
    result["¬"] = "\u00ac"
    result["­"] = "\u00ad"
    result["®"] = "\u00ae"
    result["¯"] = "\u00af"
    result["°"] = "\u00b0"
    result["±"] = "\u00b1"
    result["²"] = "\u00b2"
    result["³"] = "\u00b3"
    result["´"] = "\u00b4"
    result["µ"] = "\u00b5"
    result["¶"] = "\u00b6"
    result["·"] = "\u00b7"
    result["¸"] = "\u00b8"
    result["¹"] = "\u00b9"
    result["º"] = "\u00ba"
    result["»"] = "\u00bb"
    result["¼"] = "\u00bc"
    result["½"] = "\u00bd"
    result["¾"] = "\u00be"
    result["¿"] = "\u00bf"
    result["À"] = "\u00c0"
    result["Á"] = "\u00c1"
    result["Â"] = "\u00c2"
    result["Ã"] = "\u00c3"
    result["Ä"] = "\u00c4"
    result["Å"] = "\u00c5"
    result["Æ"] = "\u00c6"
    result["Ç"] = "\u00c7"
    result["È"] = "\u00c8"
    result["É"] = "\u00c9"
    result["Ê"] = "\u00ca"
    result["Ë"] = "\u00cb"
    result["Ì"] = "\u00cc"
    result["Í"] = "\u00cd"
    result["Î"] = "\u00ce"
    result["Ï"] = "\u00cf"
    result["Ð"] = "\u00d0"
    result["Ñ"] = "\u00d1"
    result["Ò"] = "\u00d2"
    result["Ó"] = "\u00d3"
    result["Ô"] = "\u00d4"
    result["Õ"] = "\u00d5"
    result["Ö"] = "\u00d6"
    result["×"] = "\u00d7"
    result["Ø"] = "\u00d8"
    result["Ù"] = "\u00d9"
    result["Ú"] = "\u00da"
    result["Û"] = "\u00db"
    result["Ü"] = "\u00dc"
    result["Ý"] = "\u00dd"
    result["Þ"] = "\u00de"
    result["ß"] = "\u00df"
    result["à"] = "\u00e0"
    result["á"] = "\u00e1"
    result["â"] = "\u00e2"
    result["ã"] = "\u00e3"
    result["ä"] = "\u00e4"
    result["å"] = "\u00e5"
    result["æ"] = "\u00e6"
    result["ç"] = "\u00e7"
    result["è"] = "\u00e8"
    result["é"] = "\u00e9"
    result["ê"] = "\u00ea"
    result["ë"] = "\u00eb"
    result["ì"] = "\u00ec"
    result["í"] = "\u00ed"
    result["î"] = "\u00ee"
    result["ï"] = "\u00ef"
    result["ð"] = "\u00f0"
    result["ñ"] = "\u00f1"
    result["ò"] = "\u00f2"
    result["ó"] = "\u00f3"
    result["ô"] = "\u00f4"
    result["õ"] = "\u00f5"
    result["ö"] = "\u00f6"
    result["÷"] = "\u00f7"
    result["ø"] = "\u00f8"
    result["ù"] = "\u00f9"
    result["ú"] = "\u00fa"
    result["û"] = "\u00fb"
    result["ü"] = "\u00fc"
    result["ý"] = "\u00fd"
    result["þ"] = "\u00fe"
    result["ÿ"] = "\u00ff"
    result["ƒ"] = "\u0192"
    result["Α"] = "\u0391"
    result["Β"] = "\u0392"
    result["Γ"] = "\u0393"
    result["Δ"] = "\u0394"
    result["Ε"] = "\u0395"
    result["Ζ"] = "\u0396"
    result["Η"] = "\u0397"
    result["Θ"] = "\u0398"
    result["Ι"] = "\u0399"
    result["Κ"] = "\u039a"
    result["Λ"] = "\u039b"
    result["Μ"] = "\u039c"
    result["Ν"] = "\u039d"
    result["Ξ"] = "\u039e"
    result["Ο"] = "\u039f"
    result["Π"] = "\u03a0"
    result["Ρ"] = "\u03a1"
    result["Σ"] = "\u03a3"
    result["Τ"] = "\u03a4"
    result["Υ"] = "\u03a5"
    result["Φ"] = "\u03a6"
    result["Χ"] = "\u03a7"
    result["Ψ"] = "\u03a8"
    result["Ω"] = "\u03a9"
    result["α"] = "\u03b1"
    result["β"] = "\u03b2"
    result["γ"] = "\u03b3"
    result["δ"] = "\u03b4"
    result["ε"] = "\u03b5"
    result["ζ"] = "\u03b6"
    result["η"] = "\u03b7"
    result["θ"] = "\u03b8"
    result["ι"] = "\u03b9"
    result["κ"] = "\u03ba"
    result["λ"] = "\u03bb"
    result["μ"] = "\u03bc"
    result["ν"] = "\u03bd"
    result["ξ"] = "\u03be"
    result["ο"] = "\u03bf"
    result["π"] = "\u03c0"
    result["ρ"] = "\u03c1"
    result["ς"] = "\u03c2"
    result["σ"] = "\u03c3"
    result["τ"] = "\u03c4"
    result["υ"] = "\u03c5"
    result["φ"] = "\u03c6"
    result["χ"] = "\u03c7"
    result["ψ"] = "\u03c8"
    result["ω"] = "\u03c9"
    result["ϑ"] = "\u03d1"
    result["ϒ"] = "\u03d2"
    result["ϖ"] = "\u03d6"
    result["•"] = "\u2022"
    result["…"] = "\u2026"
    result["′"] = "\u2032"
    result["″"] = "\u2033"
    result["‾"] = "\u203e"
    result["⁄"] = "\u2044"
    result["℘"] = "\u2118"
    result["ℑ"] = "\u2111"
    result["ℜ"] = "\u211c"
    result["™"] = "\u2122"
    result["ℵ"] = "\u2135"
    result["←"] = "\u2190"
    result["↑"] = "\u2191"
    result["→"] = "\u2192"
    result["↓"] = "\u2193"
    result["↔"] = "\u2194"
    result["↵"] = "\u21b5"
    result["⇐"] = "\u21d0"
    result["⇑"] = "\u21d1"
    result["⇒"] = "\u21d2"
    result["⇓"] = "\u21d3"
    result["⇔"] = "\u21d4"
    result["∀"] = "\u2200"
    result["∂"] = "\u2202"
    result["∃"] = "\u2203"
    result["∅"] = "\u2205"
    result["∇"] = "\u2207"
    result["∈"] = "\u2208"
    result["∉"] = "\u2209"
    result["∋"] = "\u220b"
    result["∏"] = "\u220f"
    result["∑"] = "\u2211"
    result["−"] = "\u2212"
    result["∗"] = "\u2217"
    result["√"] = "\u221a"
    result["∝"] = "\u221d"
    result["∞"] = "\u221e"
    result["∠"] = "\u2220"
    result["∧"] = "\u2227"
    result["∨"] = "\u2228"
    result["∩"] = "\u2229"
    result["∪"] = "\u222a"
    result["∫"] = "\u222b"
    result["∴"] = "\u2234"
    result["∼"] = "\u223c"
    result["≅"] = "\u2245"
    result["≈"] = "\u2248"
    result["≠"] = "\u2260"
    result["≡"] = "\u2261"
    result["≤"] = "\u2264"
    result["≥"] = "\u2265"
    result["⊂"] = "\u2282"
    result["⊃"] = "\u2283"
    result["⊄"] = "\u2284"
    result["⊆"] = "\u2286"
    result["⊇"] = "\u2287"
    result["⊕"] = "\u2295"
    result["⊗"] = "\u2297"
    result["⊥"] = "\u22a5"
    result["⋅"] = "\u22c5"
    result["⌈"] = "\u2308"
    result["⌉"] = "\u2309"
    result["⌊"] = "\u230a"
    result["⌋"] = "\u230b"
    result["⟨"] = "\u2329"
    result["⟩"] = "\u232a"
    result["◊"] = "\u25ca"
    result["♠"] = "\u2660"
    result["♣"] = "\u2663"
    result["♥"] = "\u2665"
    result["♦"] = "\u2666"
//
    result
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy