All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.mathieudebrito.translator.utils.HTMLEntities Maven / Gradle / Ivy

The newest version!
package com.github.mathieudebrito.translator.utils;

import java.util.Hashtable;

/**
 * Collection of static methods to convert special and extended
 * characters into HTML entitities and vice versa.

* Copyright (c) 2004-2005 Tecnick.com S.r.l (www.tecnick.com) Via Ugo Foscolo * n.19 - 09045 Quartu Sant'Elena (CA) - ITALY - www.tecnick.com - * [email protected]
* Project homepage: http://htmlentities.sourceforge.net
* License: http://www.gnu.org/copyleft/lesser.html LGPL * * @author Nicola Asuni [www.tecnick.com]. * @version 1.0.004 */ public class HTMLEntities { /** * Translation table for HTML entities.
* reference: W3C - Character entity references in HTML 4 [http://www.w3.org/TR/html401/sgml/entities.html]. */ private static final Object[][] html_entities_table = { {new String("Á"), new Integer(193)}, {new String("á"), new Integer(225)}, {new String("Â"), new Integer(194)}, {new String("â"), new Integer(226)}, {new String("´"), new Integer(180)}, {new String("Æ"), new Integer(198)}, {new String("æ"), new Integer(230)}, {new String("À"), new Integer(192)}, {new String("à"), new Integer(224)}, {new String("ℵ"), new Integer(8501)}, {new String("Α"), new Integer(913)}, {new String("α"), new Integer(945)}, {new String("&"), new Integer(38)}, {new String("∧"), new Integer(8743)}, {new String("∠"), new Integer(8736)}, {new String("Å"), new Integer(197)}, {new String("å"), new Integer(229)}, {new String("≈"), new Integer(8776)}, {new String("Ã"), new Integer(195)}, {new String("ã"), new Integer(227)}, {new String("Ä"), new Integer(196)}, {new String("ä"), new Integer(228)}, {new String("„"), new Integer(8222)}, {new String("Β"), new Integer(914)}, {new String("β"), new Integer(946)}, {new String("¦"), new Integer(166)}, {new String("•"), new Integer(8226)}, {new String("∩"), new Integer(8745)}, {new String("Ç"), new Integer(199)}, {new String("ç"), new Integer(231)}, {new String("¸"), new Integer(184)}, {new String("¢"), new Integer(162)}, {new String("Χ"), new Integer(935)}, {new String("χ"), new Integer(967)}, {new String("ˆ"), new Integer(710)}, {new String("♣"), new Integer(9827)}, {new String("≅"), new Integer(8773)}, {new String("©"), new Integer(169)}, {new String("↵"), new Integer(8629)}, {new String("∪"), new Integer(8746)}, {new String("¤"), new Integer(164)}, {new String("†"), new Integer(8224)}, {new String("‡"), new Integer(8225)}, {new String("↓"), new Integer(8595)}, {new String("⇓"), new Integer(8659)}, {new String("°"), new Integer(176)}, {new String("Δ"), new Integer(916)}, {new String("δ"), new Integer(948)}, {new String("♦"), new Integer(9830)}, {new String("÷"), new Integer(247)}, {new String("É"), new Integer(201)}, {new String("é"), new Integer(233)}, {new String("Ê"), new Integer(202)}, {new String("ê"), new Integer(234)}, {new String("È"), new Integer(200)}, {new String("è"), new Integer(232)}, {new String("∅"), new Integer(8709)}, {new String(" "), new Integer(8195)}, {new String(" "), new Integer(8194)}, {new String("Ε"), new Integer(917)}, {new String("ε"), new Integer(949)}, {new String("≡"), new Integer(8801)}, {new String("Η"), new Integer(919)}, {new String("η"), new Integer(951)}, {new String("Ð"), new Integer(208)}, {new String("ð"), new Integer(240)}, {new String("Ë"), new Integer(203)}, {new String("ë"), new Integer(235)}, {new String("€"), new Integer(8364)}, {new String("∃"), new Integer(8707)}, {new String("ƒ"), new Integer(402)}, {new String("∀"), new Integer(8704)}, {new String("½"), new Integer(189)}, {new String("¼"), new Integer(188)}, {new String("¾"), new Integer(190)}, {new String("⁄"), new Integer(8260)}, {new String("Γ"), new Integer(915)}, {new String("γ"), new Integer(947)}, {new String("≥"), new Integer(8805)}, {new String("↔"), new Integer(8596)}, {new String("⇔"), new Integer(8660)}, {new String("♥"), new Integer(9829)}, {new String("…"), new Integer(8230)}, {new String("Í"), new Integer(205)}, {new String("í"), new Integer(237)}, {new String("Î"), new Integer(206)}, {new String("î"), new Integer(238)}, {new String("¡"), new Integer(161)}, {new String("Ì"), new Integer(204)}, {new String("ì"), new Integer(236)}, {new String("ℑ"), new Integer(8465)}, {new String("∞"), new Integer(8734)}, {new String("∫"), new Integer(8747)}, {new String("Ι"), new Integer(921)}, {new String("ι"), new Integer(953)}, {new String("¿"), new Integer(191)}, {new String("∈"), new Integer(8712)}, {new String("Ï"), new Integer(207)}, {new String("ï"), new Integer(239)}, {new String("Κ"), new Integer(922)}, {new String("κ"), new Integer(954)}, {new String("Λ"), new Integer(923)}, {new String("λ"), new Integer(955)}, {new String("⟨"), new Integer(9001)}, {new String("«"), new Integer(171)}, {new String("←"), new Integer(8592)}, {new String("⇐"), new Integer(8656)}, {new String("⌈"), new Integer(8968)}, {new String("“"), new Integer(8220)}, {new String("≤"), new Integer(8804)}, {new String("⌊"), new Integer(8970)}, {new String("∗"), new Integer(8727)}, {new String("◊"), new Integer(9674)}, {new String("‎"), new Integer(8206)}, {new String("‹"), new Integer(8249)}, {new String("‘"), new Integer(8216)}, {new String("¯"), new Integer(175)}, {new String("—"), new Integer(8212)}, {new String("µ"), new Integer(181)}, {new String("·"), new Integer(183)}, {new String("−"), new Integer(8722)}, {new String("Μ"), new Integer(924)}, {new String("μ"), new Integer(956)}, {new String("∇"), new Integer(8711)}, {new String(" "), new Integer(160)}, {new String("–"), new Integer(8211)}, {new String("≠"), new Integer(8800)}, {new String("∋"), new Integer(8715)}, {new String("¬"), new Integer(172)}, {new String("∉"), new Integer(8713)}, {new String("⊄"), new Integer(8836)}, {new String("Ñ"), new Integer(209)}, {new String("ñ"), new Integer(241)}, {new String("Ν"), new Integer(925)}, {new String("ν"), new Integer(957)}, {new String("Ó"), new Integer(211)}, {new String("ó"), new Integer(243)}, {new String("Ô"), new Integer(212)}, {new String("ô"), new Integer(244)}, {new String("Œ"), new Integer(338)}, {new String("œ"), new Integer(339)}, {new String("Ò"), new Integer(210)}, {new String("ò"), new Integer(242)}, {new String("‾"), new Integer(8254)}, {new String("Ω"), new Integer(937)}, {new String("ω"), new Integer(969)}, {new String("Ο"), new Integer(927)}, {new String("ο"), new Integer(959)}, {new String("⊕"), new Integer(8853)}, {new String("∨"), new Integer(8744)}, {new String("ª"), new Integer(170)}, {new String("º"), new Integer(186)}, {new String("Ø"), new Integer(216)}, {new String("ø"), new Integer(248)}, {new String("Õ"), new Integer(213)}, {new String("õ"), new Integer(245)}, {new String("⊗"), new Integer(8855)}, {new String("Ö"), new Integer(214)}, {new String("ö"), new Integer(246)}, {new String("¶"), new Integer(182)}, {new String("∂"), new Integer(8706)}, {new String("‰"), new Integer(8240)}, {new String("⊥"), new Integer(8869)}, {new String("Φ"), new Integer(934)}, {new String("φ"), new Integer(966)}, {new String("Π"), new Integer(928)}, {new String("π"), new Integer(960)}, {new String("ϖ"), new Integer(982)}, {new String("±"), new Integer(177)}, {new String("£"), new Integer(163)}, {new String("′"), new Integer(8242)}, {new String("″"), new Integer(8243)}, {new String("∏"), new Integer(8719)}, {new String("∝"), new Integer(8733)}, {new String("Ψ"), new Integer(936)}, {new String("ψ"), new Integer(968)}, {new String("""), new Integer(34)}, {new String("√"), new Integer(8730)}, {new String("⟩"), new Integer(9002)}, {new String("»"), new Integer(187)}, {new String("→"), new Integer(8594)}, {new String("⇒"), new Integer(8658)}, {new String("⌉"), new Integer(8969)}, {new String("”"), new Integer(8221)}, {new String("ℜ"), new Integer(8476)}, {new String("®"), new Integer(174)}, {new String("⌋"), new Integer(8971)}, {new String("Ρ"), new Integer(929)}, {new String("ρ"), new Integer(961)}, {new String("‏"), new Integer(8207)}, {new String("›"), new Integer(8250)}, {new String("’"), new Integer(8217)}, {new String("‚"), new Integer(8218)}, {new String("Š"), new Integer(352)}, {new String("š"), new Integer(353)}, {new String("⋅"), new Integer(8901)}, {new String("§"), new Integer(167)}, {new String("­"), new Integer(173)}, {new String("Σ"), new Integer(931)}, {new String("σ"), new Integer(963)}, {new String("ς"), new Integer(962)}, {new String("∼"), new Integer(8764)}, {new String("♠"), new Integer(9824)}, {new String("⊂"), new Integer(8834)}, {new String("⊆"), new Integer(8838)}, {new String("∑"), new Integer(8721)}, {new String("¹"), new Integer(185)}, {new String("²"), new Integer(178)}, {new String("³"), new Integer(179)}, {new String("⊃"), new Integer(8835)}, {new String("⊇"), new Integer(8839)}, {new String("ß"), new Integer(223)}, {new String("Τ"), new Integer(932)}, {new String("τ"), new Integer(964)}, {new String("∴"), new Integer(8756)}, {new String("Θ"), new Integer(920)}, {new String("θ"), new Integer(952)}, {new String("ϑ"), new Integer(977)}, {new String(" "), new Integer(8201)}, {new String("Þ"), new Integer(222)}, {new String("þ"), new Integer(254)}, {new String("˜"), new Integer(732)}, {new String("×"), new Integer(215)}, {new String("™"), new Integer(8482)}, {new String("Ú"), new Integer(218)}, {new String("ú"), new Integer(250)}, {new String("↑"), new Integer(8593)}, {new String("⇑"), new Integer(8657)}, {new String("Û"), new Integer(219)}, {new String("û"), new Integer(251)}, {new String("Ù"), new Integer(217)}, {new String("ù"), new Integer(249)}, {new String("¨"), new Integer(168)}, {new String("ϒ"), new Integer(978)}, {new String("Υ"), new Integer(933)}, {new String("υ"), new Integer(965)}, {new String("Ü"), new Integer(220)}, {new String("ü"), new Integer(252)}, {new String("℘"), new Integer(8472)}, {new String("Ξ"), new Integer(926)}, {new String("ξ"), new Integer(958)}, {new String("Ý"), new Integer(221)}, {new String("ý"), new Integer(253)}, {new String("¥"), new Integer(165)}, {new String("ÿ"), new Integer(255)}, {new String("Ÿ"), new Integer(376)}, {new String("Ζ"), new Integer(918)}, {new String("ζ"), new Integer(950)}, {new String("‍"), new Integer(8205)}, {new String("‌"), new Integer(8204)}}; /** * Map to convert extended characters in html entities. */ private static final Hashtable htmlentities_map = new Hashtable(); /** * Map to convert html entities in exteden characters. */ private static final Hashtable unhtmlentities_map = new Hashtable(); //============================================================================== // METHODS //============================================================================== /** * Initialize HTML translation maps. */ public HTMLEntities() { initializeEntitiesTables(); } /** * Initialize HTML entities table. */ private static void initializeEntitiesTables() { // initialize html translation maps for (int i = 0; i < html_entities_table.length; ++i) { htmlentities_map.put(html_entities_table[i][1], html_entities_table[i][0]); unhtmlentities_map.put(html_entities_table[i][0], html_entities_table[i][1]); } } /** * Get the html entities translation table. * * @return translation table */ public static Object[][] getEntitiesTable() { return html_entities_table; } /** * Convert special and extended characters into HTML entitities. * * @param str input string * @return formatted string * @see #unhtmlentities(String) */ public static String htmlentities(String str) { if (str == null) { return ""; } //initialize html translation maps table the first time is called if (htmlentities_map.isEmpty()) { initializeEntitiesTables(); } StringBuffer buf = new StringBuffer(); //the otput string buffer for (int i = 0; i < str.length(); ++i) { char ch = str.charAt(i); String entity = (String) htmlentities_map.get(new Integer((int) ch)); //get equivalent html entity if (entity == null) { //if entity has not been found if (((int) ch) > 128) { //check if is an extended character buf.append("&#" + ((int) ch) + ";"); //convert extended character } else { buf.append(ch); //append the character as is } } else { buf.append(entity); //append the html entity } } return buf.toString(); } /** * Convert HTML entities to special and extended unicode characters * equivalents. * * @param str input string * @return formatted string * @see #htmlentities(String) */ public static String unhtmlentities(String str) { //initialize html translation maps table the first time is called if (htmlentities_map.isEmpty()) { initializeEntitiesTables(); } StringBuffer buf = new StringBuffer(); for (int i = 0; i < str.length(); ++i) { char ch = str.charAt(i); if (ch == '&') { int semi = str.indexOf(';', i + 1); if ((semi == -1) || ((semi - i) > 7)) { buf.append(ch); continue; } String entity = str.substring(i, semi + 1); Integer iso; if (entity.charAt(1) == ' ') { buf.append(ch); continue; } if (entity.charAt(1) == '#') { if (entity.charAt(2) == 'x') { iso = new Integer(Integer.parseInt(entity.substring(3, entity.length() - 1), 16)); } else { iso = new Integer(entity.substring(2, entity.length() - 1)); } } else { iso = (Integer) unhtmlentities_map.get(entity); } if (iso == null) { buf.append(entity); } else { buf.append((char) (iso.intValue())); } i = semi; } else { buf.append(ch); } } return buf.toString(); } // methods to convert special characters /** * Replace single quotes characters with HTML entities. * * @param str the input string * @return string with replaced single quotes */ public static String htmlSingleQuotes(String str) { str = str.replaceAll("[\']", "’"); str = str.replaceAll("'", "’"); str = str.replaceAll("‘", "’"); str = str.replaceAll("’", "’"); return str; } /** * Replace single quotes HTML entities with equivalent character. * * @param str the input string * @return string with replaced single quotes */ public static String unhtmlSingleQuotes(String str) { return str.replaceAll("’", "\'"); } /** * Replace double quotes characters with HTML entities. * * @param str the input string * @return string with replaced double quotes */ public static String htmlDoubleQuotes(String str) { str = str.replaceAll("[\"]", """); str = str.replaceAll("“", """); str = str.replaceAll("”", """); return str; } /** * Replace single quotes HTML entities with equivalent character. * * @param str the input string * @return string with replaced single quotes */ public static String unhtmlDoubleQuotes(String str) { return str.replaceAll(""", "\""); } /** * Replace single and double quotes characters with HTML entities. * * @param str the input string * @return string with replaced quotes */ public static String htmlQuotes(String str) { str = htmlDoubleQuotes(str); //convert double quotes str = htmlSingleQuotes(str); //convert single quotes return str; } /** * Replace single and double quotes HTML entities with equivalent characters. * * @param str the input string * @return string with replaced quotes */ public static String unhtmlQuotes(String str) { str = unhtmlDoubleQuotes(str); //convert double quotes str = unhtmlSingleQuotes(str); //convert single quotes return str; } /** * Replace < > characters with &lt; &gt; entities. * * @param str the input string * @return string with replaced characters */ public static String htmlAngleBrackets(String str) { str = str.replaceAll("<", "<"); str = str.replaceAll(">", ">"); return str; } /** * Replace &lt; &gt; entities with < > characters. * * @param str the input string * @return string with replaced entities */ public static String unhtmlAngleBrackets(String str) { str = str.replaceAll("<", "<"); str = str.replaceAll(">", ">"); return str; } /** * Replace & characters with &amp; HTML entities. * * @param str the input string * @return string with replaced characters */ public static String htmlAmpersand(String str) { return str.replaceAll("&", "&"); } /** * Replace &amp; HTML entities with & characters. * * @param str the input string * @return string with replaced entities */ public static String unhtmlAmpersand(String str) { return str.replaceAll("&", "&"); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy