All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.knowgate.xhtml.HtmlUtil Maven / Gradle / Ivy

The newest version!
package com.knowgate.xhtml;

/**
 * © Copyright 2016 the original author.
 * This file is licensed under the Apache License version 2.0.
 * You may not use this file except in compliance with the license.
 * You may obtain a copy of the License at:
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.
 */

public class HtmlUtil {

    final static String[] aEnts = {"amp;", "lt;", "gt;", "quot;", "iexcl;", "curren;", "yen;", "brvbar;", "sect;",
            "uml;", "copy;", "ordf;", "laquo;", "raquo;", "euro;", "pound;", "shy;", "reg;",
            "macr;", "deg;", "plusmn;", "sup1;", "sup2;", "sup3;", "acute;", "micro;", "para;",
            "middot;", "cedil;", "ordm;", "iquest;", "ntilde;", "Ntilde;", "aacute;", "eacute;", "iacute;",
            "oacute;", "uacute;", "uuml;", "Aacute;", "Agrave;", "Auml;", "Acirc;", "Aring;", "Eacute;",
            "Egrave;", "Euml;", "Ecirc;", "Iacute;", "Igrave;", "Iuml;", "Icirc;", "Oacute;", "Ograve;",
            "Ouml;", "Ocirc;", "Uacute;", "Ugrave;", "Uuml;", "Ucirc;", "frac12;", "frac34;", "frac14;",
            "Ccedil;", "ccedil;", "eth;", "cent;", "THORN;",  "thorn;", "ETH;", "times;", "divide;",
            "AElig;", "ordf;", "hellip;", "bull;", "ldquo;", "rdquo;", "ndash;", "mdash;", "oline;",
            "Alpha;", "Beta;", "Gamma;", "Delta;", "Epsilon;", "Lambda;", "Sigma;", "Pi;", "Psi;", "Omega;",
            "alpha;", "beta;", "gamma;", "delta;", "epsilon;", "lambda;", "sigma;", "pi;", "zeta;", "omega;",
            "forall;", "part;", "exist;", "empty;", "isin;", "notin;", "sum;", "infin;", "minus;",
            "loz;", "spades;", "clubs;", "hearts;", "diams;", "nbsp;"
            };

    final static char[] aChars= {'&', '<', '>', '\'', '¡', '¤', '¥', '|', '§',
             '¨', '©', 'ª', '«' , '»', '€', '£', '­', '®',
             '¯', '°', '±', '¹' , '²', '³', '´', 'µ', '¶',
             '·', '¸', 'º', '¿' , 'ñ', 'Ñ', 'á', 'é', 'í',
             'ó', 'ú', 'ü', 'Á' , 'À', 'Ä', 'Â', 'Å', 'É',
             'È', 'Ë', 'Ê', 'Í' , 'Ì', 'Ï', 'Î', 'Ó', 'Ò',
             'Ö', 'Ô', 'Ú', 'Ù' , 'Ü', 'Û', '½', '¾', '¼',
             'Ç', 'ç', 'ð', '¢' , 'Þ', 'þ', 'Ð', '×', '÷',
             'Æ', 'ª', '…', '•' , '“', '”', '–', '—', '‾',
             'Α', 'Β', 'Γ', 'Δ' , 'Ε', 'Λ', 'Σ', 'Π', 'Ψ', 'Ω',
             'α', 'β', 'γ', 'δ' , 'ε', 'λ', 'σ', 'σ', 'ζ', 'ω',
             '∀', '∂', '∃', '∅' , '∈', '∈', '∑', '∞', '−',
             '◊', '♠', '♣', '♥' , '♦', ' '
            };
	
  /**
   * Replace HTML entities with UTF-cters8 characters
   * @param text String
   * @return Input string with HTML entities replaced by UTF-8 characters
   */
  public static String HTMLDencode(String text) {
    if (text == null) return "";

    char c;
    int len = text.length();
    StringBuffer results = new StringBuffer(len);

    final int iEnts = aEnts.length;
    
    for (int i = 0; i < len; ) {
      c = text.charAt(i);
      if (c=='&' && i0) {
            if (text.charAt(i+1)=='#') {
            	if (text.charAt(i+2)=='x')
                results.append( (char) Integer.parseInt(text.substring(i + 3, semicolon-1), 16));
              else
                results.append( (char) Integer.parseInt(text.substring(i + 2, semicolon-1)));
              i = semicolon;
            } else {
              int e = -1;
              for (int f=0; f=0) {
                results.append(aChars[e]);
                i = semicolon;
              } else {
                results.append(c);
                i++;
              }
            }          
          } else {
            results.append(c);
            i++;        
          }
        } catch (StringIndexOutOfBoundsException siob) {
          return results.toString();
        }
      } else {
        results.append(c);
        i++;
      }
    } // next (i)

    return results.toString();
  } // HTMLDencode

  /**
   * 

Return text encoded as HTML.

* @param text String to encode * @return HTML-encoded text. If text is null then an empty String "" is returned. */ public static String HTMLEncode(String text) { if (text == null) return ""; char c; final int len = text.length(); final int cln = aChars.length; StringBuilder results = new StringBuilder(len*2); for (int i = 0; i < len; ++i) { c = text.charAt(i); for (int j=0; j




© 2015 - 2024 Weber Informatics LLC | Privacy Policy