xdev.util.HTMLUtils Maven / Gradle / Ivy
/*
* XDEV Application Framework - XDEV Application Framework
* Copyright © 2003 XDEV Software (https://xdev.software)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*/
package xdev.util;
import java.awt.Font;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
import xdev.io.IOUtils;
/**
*
* The HTMLUtils
class provides utility methods for HTML handling.
*
*
* @since 2.0
*
* @author XDEV Software
*/
public final class HTMLUtils
{
private HTMLUtils()
{
}
private final static Map htmlSigns;
static
{
htmlSigns = new HashMap();
htmlSigns.put(0x22,"""); // "
htmlSigns.put(0x26,"&"); // &
htmlSigns.put(0x3c,"<"); // <
htmlSigns.put(0x3e,">"); // >
htmlSigns.put(0xa0," "); //
htmlSigns.put(0xa1,"¡"); // ¡
htmlSigns.put(0xa2,"¢"); // ¢
htmlSigns.put(0xa3,"£"); // £
htmlSigns.put(0xa4,"¤"); // ¤
htmlSigns.put(0xa5,"¥"); // ¥
htmlSigns.put(0xa6,"¦"); // ¦
htmlSigns.put(0xa7,"§"); // §
htmlSigns.put(0xa8,"¨"); // ¨
htmlSigns.put(0xa9,"©"); // ©
htmlSigns.put(0xaa,"ª"); // ª
htmlSigns.put(0xab,"«"); // «
htmlSigns.put(0xac,"¬"); // ¬
htmlSigns.put(0xad,""); //
htmlSigns.put(0xae,"®"); // ®
htmlSigns.put(0xaf,"¯"); // ¯
htmlSigns.put(0xb0,"°"); // °
htmlSigns.put(0xb1,"±"); // ±
htmlSigns.put(0xb2,"²"); // ²
htmlSigns.put(0xb3,"³"); // ³
htmlSigns.put(0xb4,"´"); // ´
htmlSigns.put(0xb5,"µ"); // µ
htmlSigns.put(0xb6,"¶"); // ¶
htmlSigns.put(0xb7,"·"); // ·
htmlSigns.put(0xb8,"¸"); // ¸
htmlSigns.put(0xb9,"¹"); // ¹
htmlSigns.put(0xba,"º"); // º
htmlSigns.put(0xbb,"»"); // »
htmlSigns.put(0xbc,"¼"); // ¼
htmlSigns.put(0xbd,"½"); // ½
htmlSigns.put(0xbe,"¾"); // ¾
htmlSigns.put(0xbf,"¿"); // ¿
htmlSigns.put(0xc0,"À"); // À
htmlSigns.put(0xc1,"Á"); // Á
htmlSigns.put(0xc2,"Â"); // Â
htmlSigns.put(0xc3,"Ã"); // Ã
htmlSigns.put(0xc4,"Ä"); // Ä
htmlSigns.put(0xc5,"Å"); // Å
htmlSigns.put(0xc6,"Æ"); // Æ
htmlSigns.put(0xc7,"Ç"); // Ç
htmlSigns.put(0xc8,"È"); // È
htmlSigns.put(0xc9,"É"); // É
htmlSigns.put(0xca,"Ê"); // Ê
htmlSigns.put(0xcb,"Ë"); // Ë
htmlSigns.put(0xcc,"Ì"); // Ì
htmlSigns.put(0xcd,"Í"); // Í
htmlSigns.put(0xce,"Î"); // Î
htmlSigns.put(0xcf,"Ï"); // Ï
htmlSigns.put(0xd0,"Ð"); // Ð
htmlSigns.put(0xd1,"Ñ"); // Ñ
htmlSigns.put(0xd2,"Ò"); // Ò
htmlSigns.put(0xd3,"Ó"); // Ó
htmlSigns.put(0xd4,"Ô"); // Ô
htmlSigns.put(0xd5,"Õ"); // Õ
htmlSigns.put(0xd6,"Ö"); // Ö
htmlSigns.put(0xd7,"×"); // ×
htmlSigns.put(0xd8,"Ø"); // Ø
htmlSigns.put(0xd9,"Ù"); // Ù
htmlSigns.put(0xda,"Ú"); // Ú
htmlSigns.put(0xdb,"Û"); // Û
htmlSigns.put(0xdc,"Ü"); // Ü
htmlSigns.put(0xdd,"Ý"); // Ý
htmlSigns.put(0xde,"Þ"); // Þ
htmlSigns.put(0xdf,"ß"); // ß
htmlSigns.put(0xe0,"à"); // à
htmlSigns.put(0xe1,"á"); // á
htmlSigns.put(0xe2,"â"); // â
htmlSigns.put(0xe3,"ã"); // ã
htmlSigns.put(0xe4,"ä"); // ä
htmlSigns.put(0xe5,"å"); // å
htmlSigns.put(0xe6,"æ"); // æ
htmlSigns.put(0xe7,"ç"); // ç
htmlSigns.put(0xe8,"è"); // è
htmlSigns.put(0xe9,"é"); // é
htmlSigns.put(0xea,"ê"); // ê
htmlSigns.put(0xeb,"ë"); // ë
htmlSigns.put(0xec,"ì"); // ì
htmlSigns.put(0xed,"í"); // í
htmlSigns.put(0xee,"î"); // î
htmlSigns.put(0xef,"ï"); // ï
htmlSigns.put(0xf0,"ð"); // ð
htmlSigns.put(0xf1,"ñ"); // ñ
htmlSigns.put(0xf2,"ò"); // ò
htmlSigns.put(0xf3,"ó"); // ó
htmlSigns.put(0xf4,"ô"); // ô
htmlSigns.put(0xf5,"õ"); // õ
htmlSigns.put(0xf6,"ö"); // ö
htmlSigns.put(0xf7,"÷"); // ÷
htmlSigns.put(0xf8,"ø"); // ø
htmlSigns.put(0xf9,"ù"); // ù
htmlSigns.put(0xfa,"ú"); // ú
htmlSigns.put(0xfb,"û"); // û
htmlSigns.put(0xfc,"ü"); // ü
htmlSigns.put(0xfd,"ý"); // ý
htmlSigns.put(0xfe,"þ"); // þ
htmlSigns.put(0xff,"ÿ"); // ÿ
htmlSigns.put(0x152,"Œ"); // Œ
htmlSigns.put(0x153,"œ"); // œ
htmlSigns.put(0x160,"Š"); // Š
htmlSigns.put(0x161,"š"); // š
htmlSigns.put(0x178,"Ÿ"); // Ÿ
htmlSigns.put(0x192,"ƒ"); // ƒ
htmlSigns.put(0x2c6,"ˆ"); // ˆ
htmlSigns.put(0x2dc,"˜"); // ˜
htmlSigns.put(0x2002," "); // ?
htmlSigns.put(0x2003," "); // ?
htmlSigns.put(0x2009," "); // ?
htmlSigns.put(0x200c,""); // ?
htmlSigns.put(0x200d,""); // ?
htmlSigns.put(0x200e,""); // ?
htmlSigns.put(0x200f,""); // ?
htmlSigns.put(0x2013,"–"); // –
htmlSigns.put(0x2014,"—"); // —
htmlSigns.put(0x2018,"‘"); // ‘
htmlSigns.put(0x2019,"’"); // ’
htmlSigns.put(0x201a,"‚"); // ‚
htmlSigns.put(0x201c,"“"); // “
htmlSigns.put(0x201d,"”"); // ”
htmlSigns.put(0x201e,"„"); // „
htmlSigns.put(0x2020,"†"); // †
htmlSigns.put(0x2021,"‡"); // ‡
htmlSigns.put(0x2030,"‰"); // ‰
htmlSigns.put(0x2039,"‹"); // ‹
htmlSigns.put(0x203a,"›"); // ›
htmlSigns.put(0x20ac,"€"); // €
htmlSigns.put(0x391,"Α"); // ?
htmlSigns.put(0x392,"Β"); // ?
htmlSigns.put(0x393,"Γ"); // ?
htmlSigns.put(0x394,"Δ"); // ?
htmlSigns.put(0x395,"Ε"); // ?
htmlSigns.put(0x396,"Ζ"); // ?
htmlSigns.put(0x397,"Η"); // ?
htmlSigns.put(0x398,"Θ"); // ?
htmlSigns.put(0x399,"Ι"); // ?
htmlSigns.put(0x39a,"Κ"); // ?
htmlSigns.put(0x39b,"Λ"); // ?
htmlSigns.put(0x39c,"Μ"); // ?
htmlSigns.put(0x39d,"Ν"); // ?
htmlSigns.put(0x39e,"Ξ"); // ?
htmlSigns.put(0x39f,"Ο"); // ?
htmlSigns.put(0x3a0,"Π"); // ?
htmlSigns.put(0x3a1,"Ρ"); // ?
htmlSigns.put(0x3a3,"Σ"); // ?
htmlSigns.put(0x3a4,"Τ"); // ?
htmlSigns.put(0x3a5,"Υ"); // ?
htmlSigns.put(0x3a6,"Φ"); // ?
htmlSigns.put(0x3a7,"Χ"); // ?
htmlSigns.put(0x3a8,"Ψ"); // ?
htmlSigns.put(0x3a9,"Ω"); // ?
htmlSigns.put(0x3b1,"α"); // ?
htmlSigns.put(0x3b2,"β"); // ?
htmlSigns.put(0x3b3,"γ"); // ?
htmlSigns.put(0x3b4,"δ"); // ?
htmlSigns.put(0x3b5,"ε"); // ?
htmlSigns.put(0x3b6,"ζ"); // ?
htmlSigns.put(0x3b7,"η"); // ?
htmlSigns.put(0x3b8,"θ"); // ?
htmlSigns.put(0x3b9,"ι"); // ?
htmlSigns.put(0x3ba,"κ"); // ?
htmlSigns.put(0x3bb,"λ"); // ?
htmlSigns.put(0x3bc,"μ"); // ?
htmlSigns.put(0x3bd,"ν"); // ?
htmlSigns.put(0x3be,"ξ"); // ?
htmlSigns.put(0x3bf,"ο"); // ?
htmlSigns.put(0x3c0,"π"); // ?
htmlSigns.put(0x3c1,"ρ"); // ?
htmlSigns.put(0x3c2,"ς"); // ?
htmlSigns.put(0x3c3,"σ"); // ?
htmlSigns.put(0x3c4,"τ"); // ?
htmlSigns.put(0x3c5,"υ"); // ?
htmlSigns.put(0x3c6,"φ"); // ?
htmlSigns.put(0x3c7,"χ"); // ?
htmlSigns.put(0x3c8,"ψ"); // ?
htmlSigns.put(0x3c9,"ω"); // ?
htmlSigns.put(0x3d1,"ϑ"); // ?
htmlSigns.put(0x3d2,"ϒ"); // ?
htmlSigns.put(0x3d6,"ϖ"); // ?
htmlSigns.put(0x2022,"•"); // •
htmlSigns.put(0x2026,"…"); // …
htmlSigns.put(0x2032,"′"); // ?
htmlSigns.put(0x2033,"″"); // ?
htmlSigns.put(0x203e,"‾"); // ?
htmlSigns.put(0x2044,"⁄"); // ?
htmlSigns.put(0x2118,"℘"); // ?
htmlSigns.put(0x2111,"ℑ"); // ?
htmlSigns.put(0x211c,"ℜ"); // ?
htmlSigns.put(0x2122,"™"); // ™
htmlSigns.put(0x2135,"ℵ"); // ?
htmlSigns.put(0x2190,"←"); // ?
htmlSigns.put(0x2191,"↑"); // ?
htmlSigns.put(0x2192,"→"); // ?
htmlSigns.put(0x2193,"↓"); // ?
htmlSigns.put(0x2194,"↔"); // ?
htmlSigns.put(0x21b5,"↵"); // ?
htmlSigns.put(0x21d0,"⇐"); // ?
htmlSigns.put(0x21d1,"⇑"); // ?
htmlSigns.put(0x21d2,"⇒"); // ?
htmlSigns.put(0x21d3,"⇓"); // ?
htmlSigns.put(0x21d4,"⇔"); // ?
htmlSigns.put(0x2200,"∀"); // ?
htmlSigns.put(0x2202,"∂"); // ?
htmlSigns.put(0x2203,"∃"); // ?
htmlSigns.put(0x2205,"∅"); // ?
htmlSigns.put(0x2207,"∇"); // ?
htmlSigns.put(0x2208,"∈"); // ?
htmlSigns.put(0x2209,"∉"); // ?
htmlSigns.put(0x220b,"∋"); // ?
htmlSigns.put(0x220f,"∏"); // ?
htmlSigns.put(0x2211,"∑"); // ?
htmlSigns.put(0x2212,"−"); // ?
htmlSigns.put(0x2217,"∗"); // ?
htmlSigns.put(0x221a,"√"); // ?
htmlSigns.put(0x221d,"∝"); // ?
htmlSigns.put(0x221e,"∞"); // ?
htmlSigns.put(0x2220,"∠"); // ?
htmlSigns.put(0x2227,"∧"); // ?
htmlSigns.put(0x2228,"∨"); // ?
htmlSigns.put(0x2229,"∩"); // ?
htmlSigns.put(0x222a,"∪"); // ?
htmlSigns.put(0x222b,"∫"); // ?
htmlSigns.put(0x2234,"∴"); // ?
htmlSigns.put(0x223c,"∼"); // ?
htmlSigns.put(0x2245,"≅"); // ?
htmlSigns.put(0x2248,"≈"); // ?
htmlSigns.put(0x2260,"≠"); // ?
htmlSigns.put(0x2261,"≡"); // ?
htmlSigns.put(0x2264,"≤"); // ?
htmlSigns.put(0x2265,"≥"); // ?
htmlSigns.put(0x2282,"⊂"); // ?
htmlSigns.put(0x2283,"⊃"); // ?
htmlSigns.put(0x2284,"⊄"); // ?
htmlSigns.put(0x2286,"⊆"); // ?
htmlSigns.put(0x2287,"⊇"); // ?
htmlSigns.put(0x2295,"⊕"); // ?
htmlSigns.put(0x2297,"⊗"); // ?
htmlSigns.put(0x22a5,"⊥"); // ?
htmlSigns.put(0x22c5,"⋅"); // ?
htmlSigns.put(0x2308,"⌈"); // ?
htmlSigns.put(0x2309,"⌉"); // ?
htmlSigns.put(0x230a,"⌊"); // ?
htmlSigns.put(0x230b,"⌋"); // ?
htmlSigns.put(0x2329,"〈"); // ?
htmlSigns.put(0x232a,"〉"); // ?
htmlSigns.put(0x25ca,"◊"); // ?
htmlSigns.put(0x2660,"♠"); // ?
htmlSigns.put(0x2663,"♣"); // ?
htmlSigns.put(0x2665,"♥"); // ?
htmlSigns.put(0x2666,"♦"); // ?
}
/**
* Converts chars to html character instructions if necessary.
*
* e.g: < to <
*
* @param s
* a String
*
* @return a html displayable string
*/
public static String toHTML(String s)
{
StringBuffer sb = new StringBuffer(s.length());
for(int i = 0; i < s.length(); i++)
{
char ch = s.charAt(i);
String htmlSpecialSign = htmlSigns.get((int)ch);
if(htmlSpecialSign != null)
{
sb.append(htmlSpecialSign);
}
else
{
int ascii = (int)ch;
if(ascii > 126)
{
sb.append('&');
sb.append(ascii);
sb.append(';');
}
else
{
sb.append(ch);
}
}
}
return sb.toString();
}
/**
* Converts {@link Font} to html stylesheet.
*
*
* Examples:
*
*
* Font font = new Font("Times New Roman", Font.ITALIC, 12);
* returns
* "font-family:'Times New Roman'; font-style:italic; font-size:12pt;"
*
*
*
* This method is a alias for
* HTMLUtils.toStyle(f.getFamily(),f.getStyle(),f.getSize());
*
*
*
* @param f
* the {@link Font} of this style
*
* @return a html displayable string
*
* @see #toStyle(String, int, int)
*/
public static String toStyle(Font f)
{
return toStyle(f.getFamily(),f.getStyle(),f.getSize());
}
/**
* Converts {@link Font} to html stylesheet.
*
*
* Examples:
*
*
* Font font = new Font("Times New Roman", Font.ITALIC, 12);
* returns
* "font-family:'Times New Roman'; font-style:italic; font-size:12pt;"
*
*
*
* @param fontFamily
* the font family name as String
*
* @param style
* the font style as int
* @param size
* the font size as int
*
* @return a html displayable string
*
* @see #toStyle(Font)
*/
public static String toStyle(String fontFamily, int style, int size)
{
StringBuilder sb = new StringBuilder();
sb.append("font-family:'");
sb.append(fontFamily);
sb.append("';");
if((style & Font.ITALIC) != 0)
{
sb.append(" font-style:italic;");
}
if((style & Font.BOLD) != 0)
{
sb.append(" font-weight:bold;");
}
sb.append(" font-size:");
sb.append(size);
sb.append("pt;");
return sb.toString();
}
private static HTML2Text html2text;
// TODO java doc
public static synchronized String htmlToText(String html)
{
if(html2text == null)
{
html2text = new HTML2Text();
}
else
{
html2text.reset();
}
Reader in = new StringReader(html);
try
{
html2text.parse(in);
}
catch(Exception e)
{
}
finally
{
IOUtils.closeSilent(in);
}
return html2text.getText();
}
private static class HTML2Text extends HTMLEditorKit.ParserCallback
{
StringBuffer stringBuffer;
Stack indentStack;
static class IndexType
{
String type;
int counter;
IndexType(String type)
{
this.type = type;
counter = 0;
}
}
HTML2Text()
{
stringBuffer = new StringBuffer();
indentStack = new Stack();
}
void reset()
{
stringBuffer.setLength(0);
indentStack.clear();
}
void parse(Reader in) throws IOException
{
ParserDelegator delegator = new ParserDelegator();
// the third parameter is TRUE to ignore charset directive
delegator.parse(in,this,Boolean.TRUE);
}
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos)
{
if(t.toString().equals("p"))
{
if(stringBuffer.length() > 0
&& !stringBuffer.substring(stringBuffer.length() - 1).equals("\n"))
{
newLine();
}
newLine();
}
else if(t.toString().equals("ol"))
{
indentStack.push(new IndexType("ol"));
newLine();
}
else if(t.toString().equals("ul"))
{
indentStack.push(new IndexType("ul"));
newLine();
}
else if(t.toString().equals("li"))
{
IndexType parent = indentStack.peek();
if(parent.type.equals("ol"))
{
String numberString = "" + (++parent.counter) + ".";
stringBuffer.append(numberString);
for(int i = 0; i < (4 - numberString.length()); i++)
{
stringBuffer.append(" ");
}
}
else
{
stringBuffer.append("* ");
}
indentStack.push(new IndexType("li"));
}
else if(t.toString().equals("dl"))
{
newLine();
}
else if(t.toString().equals("dt"))
{
newLine();
}
else if(t.toString().equals("dd"))
{
indentStack.push(new IndexType("dd"));
newLine();
}
}
void newLine()
{
stringBuffer.append("\n");
for(int i = 0; i < indentStack.size(); i++)
{
stringBuffer.append(" ");
}
}
public void handleEndTag(HTML.Tag t, int pos)
{
if(t.toString().equals("p"))
{
newLine();
}
else if(t.toString().equals("ol"))
{
indentStack.pop();
;
newLine();
}
else if(t.toString().equals("ul"))
{
indentStack.pop();
;
newLine();
}
else if(t.toString().equals("li"))
{
indentStack.pop();
;
newLine();
}
else if(t.toString().equals("dd"))
{
indentStack.pop();
;
}
}
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos)
{
if(t.toString().equals("br"))
{
newLine();
}
}
public void handleText(char[] text, int pos)
{
stringBuffer.append(text);
}
String getText()
{
return stringBuffer.toString();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy