net.anotheria.util.CharacterEntityCoder Maven / Gradle / Ivy
package net.anotheria.util;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
/**
* Performs html and xml encoding.
*
* @author another
* @version $Id: $Id
*/
public class CharacterEntityCoder {
private static final Map ENTITIES;
private static final List HTML_ENTITIES;
private static final Map HTML_2_XML_ENTITIES;
static {
ENTITIES = new HashMap<>();
ENTITIES.put('\n', "
");
ENTITIES.put('\r', "
");
ENTITIES.put('\t', " ");
ENTITIES.put('&', "&");
ENTITIES.put('\"', """);
ENTITIES.put('ß', "§");
ENTITIES.put('fl', "ß");
ENTITIES.put('<', "<");
ENTITIES.put('>', ">");
ENTITIES.put('Ä', "Æ");
ENTITIES.put('¿', "À");
ENTITIES.put('¡', "Á");
ENTITIES.put('¬', "Â");
ENTITIES.put('√', "Ã");
ENTITIES.put('ƒ', "Ä");
ENTITIES.put('≈', "Å");
ENTITIES.put('∆', "Æ");
ENTITIES.put('«', "Ç");
ENTITIES.put('»', "È");
ENTITIES.put('…', "É");
ENTITIES.put(' ', "Ê");
ENTITIES.put('À', "Ë");
ENTITIES.put('Ã', "Ì");
ENTITIES.put('Õ', "Í");
ENTITIES.put('Œ', "Î");
ENTITIES.put('œ', "Ï");
ENTITIES.put('—', "Ñ");
ENTITIES.put('“', "Ò");
ENTITIES.put('”', "Ó");
ENTITIES.put('‘', "Ô");
ENTITIES.put('’', "Õ");
ENTITIES.put('÷', "Ö");
ENTITIES.put('Ÿ', "Ù");
ENTITIES.put('⁄', "Ú");
ENTITIES.put('€', "Û");
ENTITIES.put('‹', "Ü");
ENTITIES.put('›', "Ý");
ENTITIES.put('‡', "à");
ENTITIES.put('·', "á");
ENTITIES.put('‚', "â");
ENTITIES.put('„', "ã");
ENTITIES.put('‰', "ä");
ENTITIES.put('Ê', "æ");
ENTITIES.put('Á', "ç");
ENTITIES.put('Ë', "è");
ENTITIES.put('È', "é");
ENTITIES.put('Í', "ê");
ENTITIES.put('Î', "ë");
ENTITIES.put('Ï', "ì");
ENTITIES.put('Ì', "í");
ENTITIES.put('Ó', "î");
ENTITIES.put('Ô', "ï");
ENTITIES.put('', "ð");
ENTITIES.put('Ò', "ñ");
ENTITIES.put('Ú', "ò");
ENTITIES.put('Û', "ó");
ENTITIES.put('Ù', "ô");
ENTITIES.put('ı', "õ");
ENTITIES.put('ˆ', "ö");
ENTITIES.put('˘', "ù");
ENTITIES.put('˙', "ú");
ENTITIES.put('˚', "û");
ENTITIES.put('¸', "ü");
ENTITIES.put('˝', "ý");
ENTITIES.put('ˇ', "ÿ");
ENTITIES.put('-', "—");
HTML_ENTITIES = new ArrayList<>();
HTML_ENTITIES.add(new String[] {"&", "&"});
HTML_ENTITIES.add(new String[] {"\"", """});
HTML_ENTITIES.add(new String[] {"ß", "§"});
HTML_ENTITIES.add(new String[] {"fl", "ß"});
HTML_ENTITIES.add(new String[] {"<", "<"});
HTML_ENTITIES.add(new String[] {">", ">"});
HTML_ENTITIES.add(new String[] {"Ä", "€"});
HTML_ENTITIES.add(new String[] {"¿", "À"});
HTML_ENTITIES.add(new String[] {"¡", "Á"});
HTML_ENTITIES.add(new String[] {"¬", "Â"});
HTML_ENTITIES.add(new String[] {"√", "Ã"});
HTML_ENTITIES.add(new String[] {"ƒ", "Ä"});
HTML_ENTITIES.add(new String[] {"≈", "Å"});
HTML_ENTITIES.add(new String[] {"∆", "Æ"});
HTML_ENTITIES.add(new String[] {"«", "Ç"});
HTML_ENTITIES.add(new String[] {"»", "È"});
HTML_ENTITIES.add(new String[] {"…", "É"});
HTML_ENTITIES.add(new String[] {" ", "Ê"});
HTML_ENTITIES.add(new String[] {"À", "Ë"});
HTML_ENTITIES.add(new String[] {"Ã", "Ì"});
HTML_ENTITIES.add(new String[] {"Õ", "Í"});
HTML_ENTITIES.add(new String[] {"Œ", "Î"});
HTML_ENTITIES.add(new String[] {"œ", "Ï"});
HTML_ENTITIES.add(new String[] {"—", "Ñ"});
HTML_ENTITIES.add(new String[] {"“", "Ò"});
HTML_ENTITIES.add(new String[] {"”", "Ó"});
HTML_ENTITIES.add(new String[] {"‘", "Ô"});
HTML_ENTITIES.add(new String[] {"’", "Õ"});
HTML_ENTITIES.add(new String[] {"÷", "Ö"});
HTML_ENTITIES.add(new String[] {"Ÿ", "Ù"});
HTML_ENTITIES.add(new String[] {"⁄", "Ú"});
HTML_ENTITIES.add(new String[] {"€", "Û"});
HTML_ENTITIES.add(new String[] {"‹", "Ü"});
HTML_ENTITIES.add(new String[] {"›", "Ý"});
HTML_ENTITIES.add(new String[] {"‡", "à"});
HTML_ENTITIES.add(new String[] {"·", "á"});
HTML_ENTITIES.add(new String[] {"‚", "â"});
HTML_ENTITIES.add(new String[] {"„", "ã"});
HTML_ENTITIES.add(new String[] {"‰", "ä"});
HTML_ENTITIES.add(new String[] {"Ê", "æ"});
HTML_ENTITIES.add(new String[] {"Á", "ç"});
HTML_ENTITIES.add(new String[] {"Ë", "è"});
HTML_ENTITIES.add(new String[] {"È", "é"});
HTML_ENTITIES.add(new String[] {"Í", "ê"});
HTML_ENTITIES.add(new String[] {"Î", "ë"});
HTML_ENTITIES.add(new String[] {"Ï", "ì"});
HTML_ENTITIES.add(new String[] {"Ì", "í"});
HTML_ENTITIES.add(new String[] {"Ó", "î"});
HTML_ENTITIES.add(new String[] {"Ô", "ï"});
HTML_ENTITIES.add(new String[] {"", "ð"});
HTML_ENTITIES.add(new String[] {"Ò", "ñ"});
HTML_ENTITIES.add(new String[] {"Ú", "ò"});
HTML_ENTITIES.add(new String[] {"Û", "ó"});
HTML_ENTITIES.add(new String[] {"Ù", "ô"});
HTML_ENTITIES.add(new String[] {"ı", "õ"});
HTML_ENTITIES.add(new String[] {"ˆ", "ö"});
HTML_ENTITIES.add(new String[] {"˘", "ù"});
HTML_ENTITIES.add(new String[] {"˙", "ú"});
HTML_ENTITIES.add(new String[] {"˚", "û"});
HTML_ENTITIES.add(new String[] {"¸", "ü"});
HTML_ENTITIES.add(new String[] {"˝", "ý"});
HTML_ENTITIES.add(new String[] {"ˇ", "ÿ"});
HTML_ENTITIES.add(new String[] {"-", "—"});
HTML_2_XML_ENTITIES = new HashMap<>();
HTML_2_XML_ENTITIES.put("&", "&");
HTML_2_XML_ENTITIES.put("ö", "ö");
HTML_2_XML_ENTITIES.put("ä", "ä");
HTML_2_XML_ENTITIES.put("ü", "ü");
HTML_2_XML_ENTITIES.put("Ü", "Ü");
HTML_2_XML_ENTITIES.put("Ö", "Ö");
HTML_2_XML_ENTITIES.put("Ä", "Ä");
HTML_2_XML_ENTITIES.put(""", """);
HTML_2_XML_ENTITIES.put("§", "§");
HTML_2_XML_ENTITIES.put("ß", "ß");
HTML_2_XML_ENTITIES.put("<", "<");
HTML_2_XML_ENTITIES.put(">", ">");
HTML_2_XML_ENTITIES.put("€", "Æ");
HTML_2_XML_ENTITIES.put("Á", "");
HTML_2_XML_ENTITIES.put("·", " ");
HTML_2_XML_ENTITIES.put("à", "
");
HTML_2_XML_ENTITIES.put("â", "");
HTML_2_XML_ENTITIES.put("À", "À");
HTML_2_XML_ENTITIES.put("Á", "Á");
HTML_2_XML_ENTITIES.put("Â", "Â");
HTML_2_XML_ENTITIES.put("Ã", "Ã");
HTML_2_XML_ENTITIES.put("Ä", "Ä");
HTML_2_XML_ENTITIES.put("Å", "Å");
HTML_2_XML_ENTITIES.put("Æ", "Æ");
HTML_2_XML_ENTITIES.put("Ç", "Ç");
HTML_2_XML_ENTITIES.put("È", "È");
HTML_2_XML_ENTITIES.put("É", "É");
HTML_2_XML_ENTITIES.put("Ê", "Ê");
HTML_2_XML_ENTITIES.put("Ë", "Ë");
HTML_2_XML_ENTITIES.put("Ì", "Ì");
HTML_2_XML_ENTITIES.put("Í", "Í");
HTML_2_XML_ENTITIES.put("Î", "Î");
HTML_2_XML_ENTITIES.put("Ï", "Ï");
HTML_2_XML_ENTITIES.put("Ñ", "Ñ");
HTML_2_XML_ENTITIES.put("Ò", "Ò");
HTML_2_XML_ENTITIES.put("Ó", "Ó");
HTML_2_XML_ENTITIES.put("Ô", "Ô");
HTML_2_XML_ENTITIES.put("Õ", "Õ");
HTML_2_XML_ENTITIES.put("Ö", "Ö");
HTML_2_XML_ENTITIES.put("Ù", "Ù");
HTML_2_XML_ENTITIES.put("Ú", "Ú");
HTML_2_XML_ENTITIES.put("Û", "Û");
HTML_2_XML_ENTITIES.put("Ü", "Ü");
HTML_2_XML_ENTITIES.put("Ý", "Ý");
HTML_2_XML_ENTITIES.put("à", "à");
HTML_2_XML_ENTITIES.put("á", "á");
HTML_2_XML_ENTITIES.put("â", "â");
HTML_2_XML_ENTITIES.put("ã", "ã");
HTML_2_XML_ENTITIES.put("æ", "æ");
HTML_2_XML_ENTITIES.put("ç", "ç");
HTML_2_XML_ENTITIES.put("è", "è");
HTML_2_XML_ENTITIES.put("é", "é");
HTML_2_XML_ENTITIES.put("ê", "ê");
HTML_2_XML_ENTITIES.put("ë", "ë");
HTML_2_XML_ENTITIES.put("ì", "ì");
HTML_2_XML_ENTITIES.put("í", "í");
HTML_2_XML_ENTITIES.put("î", "î");
HTML_2_XML_ENTITIES.put("ï", "ï");
HTML_2_XML_ENTITIES.put("ð", "ð");
HTML_2_XML_ENTITIES.put("ñ", "ñ");
HTML_2_XML_ENTITIES.put("ò", "ò");
HTML_2_XML_ENTITIES.put("ó", "ó");
HTML_2_XML_ENTITIES.put("ô", "ô");
HTML_2_XML_ENTITIES.put("õ", "õ");
HTML_2_XML_ENTITIES.put("ù", "ù");
HTML_2_XML_ENTITIES.put("ú", "ú");
HTML_2_XML_ENTITIES.put("û", "û");
HTML_2_XML_ENTITIES.put("ü", "ü");
HTML_2_XML_ENTITIES.put("ý", "ý");
HTML_2_XML_ENTITIES.put("ÿ", "ÿ");
HTML_2_XML_ENTITIES.put("—", "—");
}
/**
* returns encoded String for XML use
*
* @param s a {@link java.lang.String} object.
* @return a {@link java.lang.String} object.
*/
public static String encodeString(String s) {
StringBuilder result = new StringBuilder("");
for (int i=0; i characterStringEntry : ENTITIES.entrySet()) {
String n = characterStringEntry.getValue();
int j = n.length();
String s1 = "";
int lastI = 0;
for (int i = 0; i < s.length() - j; i++) {
if (s.substring(i, i + j).equals(n)) {
s1 = s1 + s.substring(lastI, i) + characterStringEntry.getKey();
lastI = i + j;
i = lastI;
}
}
if (!s1.isEmpty())
s = s1;
}
return s;
}
/**
* returns dencoded String from HTML
*
* @param s a {@link java.lang.String} object.
* @return a {@link java.lang.String} object.
*/
public static String decodeHtmlString(String s) {
for (String[] sa : HTML_ENTITIES) {
char o = sa[0].charAt(0);
String n = sa[1];
int j = n.length();
String s1 = "";
int lastI = 0;
for (int i = 0; i < s.length() - j; i++) {
//System.out.println("##:"+s.substring(i,i+j)+" >>> "+n);
if (s.substring(i, i + j).equals(n)) {
s1 = s1 + s.substring(lastI, i) + o;
//System.out.println("###:"+s1);
lastI = i + j;
i = lastI;
}
}
if (!s1.isEmpty())
s = s1;
}
return s;
}
/**
* htm2xml.
*
* @param s a {@link java.lang.String} object.
* @return a {@link java.lang.String} object.
*/
public static String htm2xml(String s){
for (Map.Entry stringStringEntry : HTML_2_XML_ENTITIES.entrySet()) {
String xmlString = stringStringEntry.getValue();
int j = stringStringEntry.getKey().length();
int lastI = 0;
String replaced = "";
for (int i = 0; i < s.length() - j; i++) {
if (s.substring(i, i + j).equals(stringStringEntry.getKey())) {
//System.out.println("ON Replace "+s.substring(i,i+j)+" -> "+s.substring(lastI, i)+xmlString);
replaced = replaced + s.substring(lastI, i) + xmlString;
//System.out.println("###:"+replaced);
lastI = i + j;
i = lastI - 1;
}
}
replaced = replaced + s.substring(lastI, s.length());
if (!replaced.isEmpty())
s = replaced;
}
return s;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy