All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.anotheria.util.CharacterEntityCoder Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
package net.anotheria.util;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

/**
 * Performs html and xml encoding.
 *
 * @author another
 * @version $Id: $Id
 */
public class CharacterEntityCoder {
	private static final Map ENTITIES;
	private static final List HTML_ENTITIES;
	private static final Map HTML_2_XML_ENTITIES;
	
	
	static {
		ENTITIES = new HashMap<>();
		ENTITIES.put('\n', "
");
		ENTITIES.put('\r', "
");
		ENTITIES.put('\t', "	");
		ENTITIES.put('&', "&");
		ENTITIES.put('\"', """);
		ENTITIES.put('ß', "§");
		ENTITIES.put('fl', "ß");
		ENTITIES.put('<', "<");
		ENTITIES.put('>', ">");
		ENTITIES.put('Ä', "Æ");
		ENTITIES.put('¿', "À");
		ENTITIES.put('¡', "Á");
		ENTITIES.put('¬', "Â");
		ENTITIES.put('√', "Ã");
		ENTITIES.put('ƒ', "Ä");
		ENTITIES.put('≈', "Å");
		ENTITIES.put('∆', "Æ");
		ENTITIES.put('«', "Ç");
		ENTITIES.put('»', "È");
		ENTITIES.put('…', "É");
		ENTITIES.put(' ', "Ê");
		ENTITIES.put('À', "Ë");
		ENTITIES.put('Ã', "Ì");
		ENTITIES.put('Õ', "Í");
		ENTITIES.put('Œ', "Î");
		ENTITIES.put('œ', "Ï");
		ENTITIES.put('—', "Ñ");
		ENTITIES.put('“', "Ò");
		ENTITIES.put('”', "Ó");
		ENTITIES.put('‘', "Ô");
		ENTITIES.put('’', "Õ");
		ENTITIES.put('÷', "Ö");
		ENTITIES.put('Ÿ', "Ù");
		ENTITIES.put('⁄', "Ú");
		ENTITIES.put('€', "Û");
		ENTITIES.put('‹', "Ü");
		ENTITIES.put('›', "Ý");
		ENTITIES.put('‡', "à");
		ENTITIES.put('·', "á");
		ENTITIES.put('‚', "â");
		ENTITIES.put('„', "ã");
		ENTITIES.put('‰', "ä");
		ENTITIES.put('Ê', "æ");
		ENTITIES.put('Á', "ç");
		ENTITIES.put('Ë', "è");
		ENTITIES.put('È', "é");
		ENTITIES.put('Í', "ê");
		ENTITIES.put('Î', "ë");
		ENTITIES.put('Ï', "ì");
		ENTITIES.put('Ì', "í");
		ENTITIES.put('Ó', "î");
		ENTITIES.put('Ô', "ï");
		ENTITIES.put('', "ð");
		ENTITIES.put('Ò', "ñ");
		ENTITIES.put('Ú', "ò");
		ENTITIES.put('Û', "ó");
		ENTITIES.put('Ù', "ô");
		ENTITIES.put('ı', "õ");
		ENTITIES.put('ˆ', "ö");
		ENTITIES.put('˘', "ù");
		ENTITIES.put('˙', "ú");
		ENTITIES.put('˚', "û");
		ENTITIES.put('¸', "ü");
		ENTITIES.put('˝', "ý");
		ENTITIES.put('ˇ', "ÿ");
		ENTITIES.put('-', "—");
		
		HTML_ENTITIES = new ArrayList<>();
		HTML_ENTITIES.add(new String[] {"&", "&"});
		HTML_ENTITIES.add(new String[] {"\"", """});
		HTML_ENTITIES.add(new String[] {"ß", "§"});
		HTML_ENTITIES.add(new String[] {"fl", "ß"});
		HTML_ENTITIES.add(new String[] {"<", "<"});
		HTML_ENTITIES.add(new String[] {">", ">"});
		HTML_ENTITIES.add(new String[] {"Ä", "€"});
		HTML_ENTITIES.add(new String[] {"¿", "À"});
		HTML_ENTITIES.add(new String[] {"¡", "Á"});
		HTML_ENTITIES.add(new String[] {"¬", "Â"});
		HTML_ENTITIES.add(new String[] {"√", "Ã"});
		HTML_ENTITIES.add(new String[] {"ƒ", "Ä"});
		HTML_ENTITIES.add(new String[] {"≈", "Å"});
		HTML_ENTITIES.add(new String[] {"∆", "Æ"});
		HTML_ENTITIES.add(new String[] {"«", "Ç"});
		HTML_ENTITIES.add(new String[] {"»", "È"});
		HTML_ENTITIES.add(new String[] {"…", "É"});
		HTML_ENTITIES.add(new String[] {" ", "Ê"});
		HTML_ENTITIES.add(new String[] {"À", "Ë"});
		HTML_ENTITIES.add(new String[] {"Ã", "Ì"});
		HTML_ENTITIES.add(new String[] {"Õ", "Í"});
		HTML_ENTITIES.add(new String[] {"Œ", "Î"});
		HTML_ENTITIES.add(new String[] {"œ", "Ï"});
		HTML_ENTITIES.add(new String[] {"—", "Ñ"});
		HTML_ENTITIES.add(new String[] {"“", "Ò"});
		HTML_ENTITIES.add(new String[] {"”", "Ó"});
		HTML_ENTITIES.add(new String[] {"‘", "Ô"});
		HTML_ENTITIES.add(new String[] {"’", "Õ"});
		HTML_ENTITIES.add(new String[] {"÷", "Ö"});
		HTML_ENTITIES.add(new String[] {"Ÿ", "Ù"});
		HTML_ENTITIES.add(new String[] {"⁄", "Ú"});
		HTML_ENTITIES.add(new String[] {"€", "Û"});
		HTML_ENTITIES.add(new String[] {"‹", "Ü"});
		HTML_ENTITIES.add(new String[] {"›", "Ý"});
		HTML_ENTITIES.add(new String[] {"‡", "à"});
		HTML_ENTITIES.add(new String[] {"·", "á"});
		HTML_ENTITIES.add(new String[] {"‚", "â"});
		HTML_ENTITIES.add(new String[] {"„", "ã"});
		HTML_ENTITIES.add(new String[] {"‰", "ä"});
		HTML_ENTITIES.add(new String[] {"Ê", "æ"});
		HTML_ENTITIES.add(new String[] {"Á", "ç"});
		HTML_ENTITIES.add(new String[] {"Ë", "è"});
		HTML_ENTITIES.add(new String[] {"È", "é"});
		HTML_ENTITIES.add(new String[] {"Í", "ê"});
		HTML_ENTITIES.add(new String[] {"Î", "ë"});
		HTML_ENTITIES.add(new String[] {"Ï", "ì"});
		HTML_ENTITIES.add(new String[] {"Ì", "í"});
		HTML_ENTITIES.add(new String[] {"Ó", "î"});
		HTML_ENTITIES.add(new String[] {"Ô", "ï"});
		HTML_ENTITIES.add(new String[] {"", "ð"});
		HTML_ENTITIES.add(new String[] {"Ò", "ñ"});
		HTML_ENTITIES.add(new String[] {"Ú", "ò"});
		HTML_ENTITIES.add(new String[] {"Û", "ó"});
		HTML_ENTITIES.add(new String[] {"Ù", "ô"});
		HTML_ENTITIES.add(new String[] {"ı", "õ"});
		HTML_ENTITIES.add(new String[] {"ˆ", "ö"});
		HTML_ENTITIES.add(new String[] {"˘", "ù"});
		HTML_ENTITIES.add(new String[] {"˙", "ú"});
		HTML_ENTITIES.add(new String[] {"˚", "û"});
		HTML_ENTITIES.add(new String[] {"¸", "ü"});
		HTML_ENTITIES.add(new String[] {"˝", "ý"});
		HTML_ENTITIES.add(new String[] {"ˇ", "ÿ"});
		HTML_ENTITIES.add(new String[] {"-", "—"});

		HTML_2_XML_ENTITIES = new HashMap<>();
		HTML_2_XML_ENTITIES.put("&",   "&");
		HTML_2_XML_ENTITIES.put("ö",  "ö");
		HTML_2_XML_ENTITIES.put("ä",  "ä");
		HTML_2_XML_ENTITIES.put("ü",  "ü");
		HTML_2_XML_ENTITIES.put("Ü",  "Ü");
		HTML_2_XML_ENTITIES.put("Ö",  "Ö");
		HTML_2_XML_ENTITIES.put("Ä",  "Ä");
		HTML_2_XML_ENTITIES.put(""",  """);
		HTML_2_XML_ENTITIES.put("§",  "§");
		HTML_2_XML_ENTITIES.put("ß", "ß");
		HTML_2_XML_ENTITIES.put("<",    "<");
		HTML_2_XML_ENTITIES.put(">",    ">");
		HTML_2_XML_ENTITIES.put("€",  "Æ");
		HTML_2_XML_ENTITIES.put("Á",		"‡");
		HTML_2_XML_ENTITIES.put("·",		" ");
		HTML_2_XML_ENTITIES.put("à",	"…");
		HTML_2_XML_ENTITIES.put("â",	"ƒ");
		HTML_2_XML_ENTITIES.put("À",	"À");
		HTML_2_XML_ENTITIES.put("Á",	"Á");
		HTML_2_XML_ENTITIES.put("Â",	"Â");
		HTML_2_XML_ENTITIES.put("Ã",	"Ã");
		HTML_2_XML_ENTITIES.put("Ä",	"Ä");
		HTML_2_XML_ENTITIES.put("Å",	"Å");
		HTML_2_XML_ENTITIES.put("Æ",	"Æ");
		HTML_2_XML_ENTITIES.put("Ç",	"Ç");
		HTML_2_XML_ENTITIES.put("È",	"È");
		HTML_2_XML_ENTITIES.put("É",	"É");
		HTML_2_XML_ENTITIES.put("Ê",	"Ê");
		HTML_2_XML_ENTITIES.put("Ë",	"Ë");
		HTML_2_XML_ENTITIES.put("Ì",	"Ì");
		HTML_2_XML_ENTITIES.put("Í",	"Í");
		HTML_2_XML_ENTITIES.put("Î",	"Î");
		HTML_2_XML_ENTITIES.put("Ï",	"Ï");
		HTML_2_XML_ENTITIES.put("Ñ",	"Ñ");
		HTML_2_XML_ENTITIES.put("Ò",	"Ò");
		HTML_2_XML_ENTITIES.put("Ó",	"Ó");
		HTML_2_XML_ENTITIES.put("Ô",	"Ô");
		HTML_2_XML_ENTITIES.put("Õ",	"Õ");
		HTML_2_XML_ENTITIES.put("Ö",	"Ö");
		HTML_2_XML_ENTITIES.put("Ù",	"Ù");
		HTML_2_XML_ENTITIES.put("Ú",	"Ú");
		HTML_2_XML_ENTITIES.put("Û",	"Û");
		HTML_2_XML_ENTITIES.put("Ü",	"Ü");
		HTML_2_XML_ENTITIES.put("Ý",	"Ý");
		HTML_2_XML_ENTITIES.put("à",	"à");
		HTML_2_XML_ENTITIES.put("á",	"á");
		HTML_2_XML_ENTITIES.put("â",	"â");
		HTML_2_XML_ENTITIES.put("ã",	"ã");
		HTML_2_XML_ENTITIES.put("æ",	"æ");
		HTML_2_XML_ENTITIES.put("ç",	"ç");
		HTML_2_XML_ENTITIES.put("è",	"è");
		HTML_2_XML_ENTITIES.put("é",	"é");
		HTML_2_XML_ENTITIES.put("ê",	"ê");
		HTML_2_XML_ENTITIES.put("ë",	"ë");
		HTML_2_XML_ENTITIES.put("ì",	"ì");
		HTML_2_XML_ENTITIES.put("í",	"í");
		HTML_2_XML_ENTITIES.put("î",	"î");
		HTML_2_XML_ENTITIES.put("ï",	"ï");
		HTML_2_XML_ENTITIES.put("ð",	"ð");
		HTML_2_XML_ENTITIES.put("ñ",	"ñ");
		HTML_2_XML_ENTITIES.put("ò",	"ò");
		HTML_2_XML_ENTITIES.put("ó",	"ó");
		HTML_2_XML_ENTITIES.put("ô",	"ô");
		HTML_2_XML_ENTITIES.put("õ",	"õ");
		HTML_2_XML_ENTITIES.put("ù",	"ù");
		HTML_2_XML_ENTITIES.put("ú",	"ú");
		HTML_2_XML_ENTITIES.put("û",	"û");
		HTML_2_XML_ENTITIES.put("ü",	"ü");
		HTML_2_XML_ENTITIES.put("ý",	"ý");
		HTML_2_XML_ENTITIES.put("ÿ",	"ÿ");
		HTML_2_XML_ENTITIES.put("—",	"—");
	}

	/**
	 * returns encoded String for XML use
	 *
	 * @param s a {@link java.lang.String} object.
	 * @return a {@link java.lang.String} object.
	 */
	public static String encodeString(String s) {
		StringBuilder result = new StringBuilder("");
		for (int i=0; i characterStringEntry : ENTITIES.entrySet()) {
			String n = characterStringEntry.getValue();
			int j = n.length();
			String s1 = "";
			int lastI = 0;
			for (int i = 0; i < s.length() - j; i++) {
				if (s.substring(i, i + j).equals(n)) {
					s1 = s1 + s.substring(lastI, i) + characterStringEntry.getKey();
					lastI = i + j;
					i = lastI;
				}
			}
			if (!s1.isEmpty())
				s = s1;
		}

		return s;
	}

	/**
	 * returns dencoded String from HTML
	 *
	 * @param s a {@link java.lang.String} object.
	 * @return a {@link java.lang.String} object.
	 */
	public static String decodeHtmlString(String s) {
        for (String[] sa : HTML_ENTITIES) {
            char o = sa[0].charAt(0);
            String n = sa[1];
            int j = n.length();
            String s1 = "";
            int lastI = 0;
            for (int i = 0; i < s.length() - j; i++) {
                //System.out.println("##:"+s.substring(i,i+j)+"  >>> "+n);
                if (s.substring(i, i + j).equals(n)) {
                    s1 = s1 + s.substring(lastI, i) + o;
                    //System.out.println("###:"+s1);
                    lastI = i + j;
                    i = lastI;
                }
            }
            if (!s1.isEmpty())
                s = s1;
        }
		return s;
	}

	/**
	 * 

htm2xml.

* * @param s a {@link java.lang.String} object. * @return a {@link java.lang.String} object. */ public static String htm2xml(String s){ for (Map.Entry stringStringEntry : HTML_2_XML_ENTITIES.entrySet()) { String xmlString = stringStringEntry.getValue(); int j = stringStringEntry.getKey().length(); int lastI = 0; String replaced = ""; for (int i = 0; i < s.length() - j; i++) { if (s.substring(i, i + j).equals(stringStringEntry.getKey())) { //System.out.println("ON Replace "+s.substring(i,i+j)+" -> "+s.substring(lastI, i)+xmlString); replaced = replaced + s.substring(lastI, i) + xmlString; //System.out.println("###:"+replaced); lastI = i + j; i = lastI - 1; } } replaced = replaced + s.substring(lastI, s.length()); if (!replaced.isEmpty()) s = replaced; } return s; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy