All Downloads are FREE. Search and download functionalities are using the official Maven repository.

xdev.util.HTMLUtils Maven / Gradle / Ivy

/*
 * XDEV Application Framework - XDEV Application Framework
 * Copyright © 2003 XDEV Software (https://xdev.software)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 */
package xdev.util;


import java.awt.Font;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;

import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;

import xdev.io.IOUtils;


/**
 * 

* The HTMLUtils class provides utility methods for HTML handling. *

* * @since 2.0 * * @author XDEV Software */ public final class HTMLUtils { private HTMLUtils() { } private final static Map htmlSigns; static { htmlSigns = new HashMap(); htmlSigns.put(0x22,"""); // " htmlSigns.put(0x26,"&"); // & htmlSigns.put(0x3c,"<"); // < htmlSigns.put(0x3e,">"); // > htmlSigns.put(0xa0," "); // htmlSigns.put(0xa1,"¡"); // ¡ htmlSigns.put(0xa2,"¢"); // ¢ htmlSigns.put(0xa3,"£"); // £ htmlSigns.put(0xa4,"¤"); // ¤ htmlSigns.put(0xa5,"¥"); // ¥ htmlSigns.put(0xa6,"¦"); // ¦ htmlSigns.put(0xa7,"§"); // § htmlSigns.put(0xa8,"¨"); // ¨ htmlSigns.put(0xa9,"©"); // © htmlSigns.put(0xaa,"ª"); // ª htmlSigns.put(0xab,"«"); // « htmlSigns.put(0xac,"¬"); // ¬ htmlSigns.put(0xad,"­"); // ­ htmlSigns.put(0xae,"®"); // ® htmlSigns.put(0xaf,"¯"); // ¯ htmlSigns.put(0xb0,"°"); // ° htmlSigns.put(0xb1,"±"); // ± htmlSigns.put(0xb2,"²"); // ² htmlSigns.put(0xb3,"³"); // ³ htmlSigns.put(0xb4,"´"); // ´ htmlSigns.put(0xb5,"µ"); // µ htmlSigns.put(0xb6,"¶"); // ¶ htmlSigns.put(0xb7,"·"); // · htmlSigns.put(0xb8,"¸"); // ¸ htmlSigns.put(0xb9,"¹"); // ¹ htmlSigns.put(0xba,"º"); // º htmlSigns.put(0xbb,"»"); // » htmlSigns.put(0xbc,"¼"); // ¼ htmlSigns.put(0xbd,"½"); // ½ htmlSigns.put(0xbe,"¾"); // ¾ htmlSigns.put(0xbf,"¿"); // ¿ htmlSigns.put(0xc0,"À"); // À htmlSigns.put(0xc1,"Á"); // Á htmlSigns.put(0xc2,"Â"); //  htmlSigns.put(0xc3,"Ã"); // à htmlSigns.put(0xc4,"Ä"); // Ä htmlSigns.put(0xc5,"Å"); // Å htmlSigns.put(0xc6,"Æ"); // Æ htmlSigns.put(0xc7,"Ç"); // Ç htmlSigns.put(0xc8,"È"); // È htmlSigns.put(0xc9,"É"); // É htmlSigns.put(0xca,"Ê"); // Ê htmlSigns.put(0xcb,"Ë"); // Ë htmlSigns.put(0xcc,"Ì"); // Ì htmlSigns.put(0xcd,"Í"); // Í htmlSigns.put(0xce,"Î"); // Î htmlSigns.put(0xcf,"Ï"); // Ï htmlSigns.put(0xd0,"Ð"); // Ð htmlSigns.put(0xd1,"Ñ"); // Ñ htmlSigns.put(0xd2,"Ò"); // Ò htmlSigns.put(0xd3,"Ó"); // Ó htmlSigns.put(0xd4,"Ô"); // Ô htmlSigns.put(0xd5,"Õ"); // Õ htmlSigns.put(0xd6,"Ö"); // Ö htmlSigns.put(0xd7,"×"); // × htmlSigns.put(0xd8,"Ø"); // Ø htmlSigns.put(0xd9,"Ù"); // Ù htmlSigns.put(0xda,"Ú"); // Ú htmlSigns.put(0xdb,"Û"); // Û htmlSigns.put(0xdc,"Ü"); // Ü htmlSigns.put(0xdd,"Ý"); // Ý htmlSigns.put(0xde,"Þ"); // Þ htmlSigns.put(0xdf,"ß"); // ß htmlSigns.put(0xe0,"à"); // à htmlSigns.put(0xe1,"á"); // á htmlSigns.put(0xe2,"â"); // â htmlSigns.put(0xe3,"ã"); // ã htmlSigns.put(0xe4,"ä"); // ä htmlSigns.put(0xe5,"å"); // å htmlSigns.put(0xe6,"æ"); // æ htmlSigns.put(0xe7,"ç"); // ç htmlSigns.put(0xe8,"è"); // è htmlSigns.put(0xe9,"é"); // é htmlSigns.put(0xea,"ê"); // ê htmlSigns.put(0xeb,"ë"); // ë htmlSigns.put(0xec,"ì"); // ì htmlSigns.put(0xed,"í"); // í htmlSigns.put(0xee,"î"); // î htmlSigns.put(0xef,"ï"); // ï htmlSigns.put(0xf0,"ð"); // ð htmlSigns.put(0xf1,"ñ"); // ñ htmlSigns.put(0xf2,"ò"); // ò htmlSigns.put(0xf3,"ó"); // ó htmlSigns.put(0xf4,"ô"); // ô htmlSigns.put(0xf5,"õ"); // õ htmlSigns.put(0xf6,"ö"); // ö htmlSigns.put(0xf7,"÷"); // ÷ htmlSigns.put(0xf8,"ø"); // ø htmlSigns.put(0xf9,"ù"); // ù htmlSigns.put(0xfa,"ú"); // ú htmlSigns.put(0xfb,"û"); // û htmlSigns.put(0xfc,"ü"); // ü htmlSigns.put(0xfd,"ý"); // ý htmlSigns.put(0xfe,"þ"); // þ htmlSigns.put(0xff,"ÿ"); // ÿ htmlSigns.put(0x152,"Œ"); // Œ htmlSigns.put(0x153,"œ"); // œ htmlSigns.put(0x160,"Š"); // Š htmlSigns.put(0x161,"š"); // š htmlSigns.put(0x178,"Ÿ"); // Ÿ htmlSigns.put(0x192,"ƒ"); // ƒ htmlSigns.put(0x2c6,"ˆ"); // ˆ htmlSigns.put(0x2dc,"˜"); // ˜ htmlSigns.put(0x2002," "); // ? htmlSigns.put(0x2003," "); // ? htmlSigns.put(0x2009," "); // ? htmlSigns.put(0x200c,"‌"); // ? htmlSigns.put(0x200d,"‍"); // ? htmlSigns.put(0x200e,"‎"); // ? htmlSigns.put(0x200f,"‏"); // ? htmlSigns.put(0x2013,"–"); // – htmlSigns.put(0x2014,"—"); // — htmlSigns.put(0x2018,"‘"); // ‘ htmlSigns.put(0x2019,"’"); // ’ htmlSigns.put(0x201a,"‚"); // ‚ htmlSigns.put(0x201c,"“"); // “ htmlSigns.put(0x201d,"”"); // ” htmlSigns.put(0x201e,"„"); // „ htmlSigns.put(0x2020,"†"); // † htmlSigns.put(0x2021,"‡"); // ‡ htmlSigns.put(0x2030,"‰"); // ‰ htmlSigns.put(0x2039,"‹"); // ‹ htmlSigns.put(0x203a,"›"); // › htmlSigns.put(0x20ac,"€"); // € htmlSigns.put(0x391,"Α"); // ? htmlSigns.put(0x392,"Β"); // ? htmlSigns.put(0x393,"Γ"); // ? htmlSigns.put(0x394,"Δ"); // ? htmlSigns.put(0x395,"Ε"); // ? htmlSigns.put(0x396,"Ζ"); // ? htmlSigns.put(0x397,"Η"); // ? htmlSigns.put(0x398,"Θ"); // ? htmlSigns.put(0x399,"Ι"); // ? htmlSigns.put(0x39a,"Κ"); // ? htmlSigns.put(0x39b,"Λ"); // ? htmlSigns.put(0x39c,"Μ"); // ? htmlSigns.put(0x39d,"Ν"); // ? htmlSigns.put(0x39e,"Ξ"); // ? htmlSigns.put(0x39f,"Ο"); // ? htmlSigns.put(0x3a0,"Π"); // ? htmlSigns.put(0x3a1,"Ρ"); // ? htmlSigns.put(0x3a3,"Σ"); // ? htmlSigns.put(0x3a4,"Τ"); // ? htmlSigns.put(0x3a5,"Υ"); // ? htmlSigns.put(0x3a6,"Φ"); // ? htmlSigns.put(0x3a7,"Χ"); // ? htmlSigns.put(0x3a8,"Ψ"); // ? htmlSigns.put(0x3a9,"Ω"); // ? htmlSigns.put(0x3b1,"α"); // ? htmlSigns.put(0x3b2,"β"); // ? htmlSigns.put(0x3b3,"γ"); // ? htmlSigns.put(0x3b4,"δ"); // ? htmlSigns.put(0x3b5,"ε"); // ? htmlSigns.put(0x3b6,"ζ"); // ? htmlSigns.put(0x3b7,"η"); // ? htmlSigns.put(0x3b8,"θ"); // ? htmlSigns.put(0x3b9,"ι"); // ? htmlSigns.put(0x3ba,"κ"); // ? htmlSigns.put(0x3bb,"λ"); // ? htmlSigns.put(0x3bc,"μ"); // ? htmlSigns.put(0x3bd,"ν"); // ? htmlSigns.put(0x3be,"ξ"); // ? htmlSigns.put(0x3bf,"ο"); // ? htmlSigns.put(0x3c0,"π"); // ? htmlSigns.put(0x3c1,"ρ"); // ? htmlSigns.put(0x3c2,"ς"); // ? htmlSigns.put(0x3c3,"σ"); // ? htmlSigns.put(0x3c4,"τ"); // ? htmlSigns.put(0x3c5,"υ"); // ? htmlSigns.put(0x3c6,"φ"); // ? htmlSigns.put(0x3c7,"χ"); // ? htmlSigns.put(0x3c8,"ψ"); // ? htmlSigns.put(0x3c9,"ω"); // ? htmlSigns.put(0x3d1,"ϑ"); // ? htmlSigns.put(0x3d2,"ϒ"); // ? htmlSigns.put(0x3d6,"ϖ"); // ? htmlSigns.put(0x2022,"•"); // • htmlSigns.put(0x2026,"…"); // … htmlSigns.put(0x2032,"′"); // ? htmlSigns.put(0x2033,"″"); // ? htmlSigns.put(0x203e,"‾"); // ? htmlSigns.put(0x2044,"⁄"); // ? htmlSigns.put(0x2118,"℘"); // ? htmlSigns.put(0x2111,"ℑ"); // ? htmlSigns.put(0x211c,"ℜ"); // ? htmlSigns.put(0x2122,"™"); // ™ htmlSigns.put(0x2135,"ℵ"); // ? htmlSigns.put(0x2190,"←"); // ? htmlSigns.put(0x2191,"↑"); // ? htmlSigns.put(0x2192,"→"); // ? htmlSigns.put(0x2193,"↓"); // ? htmlSigns.put(0x2194,"↔"); // ? htmlSigns.put(0x21b5,"↵"); // ? htmlSigns.put(0x21d0,"⇐"); // ? htmlSigns.put(0x21d1,"⇑"); // ? htmlSigns.put(0x21d2,"⇒"); // ? htmlSigns.put(0x21d3,"⇓"); // ? htmlSigns.put(0x21d4,"⇔"); // ? htmlSigns.put(0x2200,"∀"); // ? htmlSigns.put(0x2202,"∂"); // ? htmlSigns.put(0x2203,"∃"); // ? htmlSigns.put(0x2205,"∅"); // ? htmlSigns.put(0x2207,"∇"); // ? htmlSigns.put(0x2208,"∈"); // ? htmlSigns.put(0x2209,"∉"); // ? htmlSigns.put(0x220b,"∋"); // ? htmlSigns.put(0x220f,"∏"); // ? htmlSigns.put(0x2211,"∑"); // ? htmlSigns.put(0x2212,"−"); // ? htmlSigns.put(0x2217,"∗"); // ? htmlSigns.put(0x221a,"√"); // ? htmlSigns.put(0x221d,"∝"); // ? htmlSigns.put(0x221e,"∞"); // ? htmlSigns.put(0x2220,"∠"); // ? htmlSigns.put(0x2227,"∧"); // ? htmlSigns.put(0x2228,"∨"); // ? htmlSigns.put(0x2229,"∩"); // ? htmlSigns.put(0x222a,"∪"); // ? htmlSigns.put(0x222b,"∫"); // ? htmlSigns.put(0x2234,"∴"); // ? htmlSigns.put(0x223c,"∼"); // ? htmlSigns.put(0x2245,"≅"); // ? htmlSigns.put(0x2248,"≈"); // ? htmlSigns.put(0x2260,"≠"); // ? htmlSigns.put(0x2261,"≡"); // ? htmlSigns.put(0x2264,"≤"); // ? htmlSigns.put(0x2265,"≥"); // ? htmlSigns.put(0x2282,"⊂"); // ? htmlSigns.put(0x2283,"⊃"); // ? htmlSigns.put(0x2284,"⊄"); // ? htmlSigns.put(0x2286,"⊆"); // ? htmlSigns.put(0x2287,"⊇"); // ? htmlSigns.put(0x2295,"⊕"); // ? htmlSigns.put(0x2297,"⊗"); // ? htmlSigns.put(0x22a5,"⊥"); // ? htmlSigns.put(0x22c5,"⋅"); // ? htmlSigns.put(0x2308,"⌈"); // ? htmlSigns.put(0x2309,"⌉"); // ? htmlSigns.put(0x230a,"⌊"); // ? htmlSigns.put(0x230b,"⌋"); // ? htmlSigns.put(0x2329,"⟨"); // ? htmlSigns.put(0x232a,"⟩"); // ? htmlSigns.put(0x25ca,"◊"); // ? htmlSigns.put(0x2660,"♠"); // ? htmlSigns.put(0x2663,"♣"); // ? htmlSigns.put(0x2665,"♥"); // ? htmlSigns.put(0x2666,"♦"); // ? } /** * Converts chars to html character instructions if necessary.
*
* e.g: < to &lt; * * @param s * a String * * @return a html displayable string */ public static String toHTML(String s) { StringBuffer sb = new StringBuffer(s.length()); for(int i = 0; i < s.length(); i++) { char ch = s.charAt(i); String htmlSpecialSign = htmlSigns.get((int)ch); if(htmlSpecialSign != null) { sb.append(htmlSpecialSign); } else { int ascii = (int)ch; if(ascii > 126) { sb.append('&'); sb.append(ascii); sb.append(';'); } else { sb.append(ch); } } } return sb.toString(); } /** * Converts {@link Font} to html stylesheet.
* *

* Examples: * *
* Font font = new Font("Times New Roman", Font.ITALIC, 12);
* returns * "font-family:'Times New Roman'; font-style:italic; font-size:12pt;" *

* *

* This method is a alias for * HTMLUtils.toStyle(f.getFamily(),f.getStyle(),f.getSize()); *

* * * @param f * the {@link Font} of this style * * @return a html displayable string * * @see #toStyle(String, int, int) */ public static String toStyle(Font f) { return toStyle(f.getFamily(),f.getStyle(),f.getSize()); } /** * Converts {@link Font} to html stylesheet.
* *

* Examples: * *
* Font font = new Font("Times New Roman", Font.ITALIC, 12);
* returns * "font-family:'Times New Roman'; font-style:italic; font-size:12pt;" *

* * * @param fontFamily * the font family name as String * * @param style * the font style as int * @param size * the font size as int * * @return a html displayable string * * @see #toStyle(Font) */ public static String toStyle(String fontFamily, int style, int size) { StringBuilder sb = new StringBuilder(); sb.append("font-family:'"); sb.append(fontFamily); sb.append("';"); if((style & Font.ITALIC) != 0) { sb.append(" font-style:italic;"); } if((style & Font.BOLD) != 0) { sb.append(" font-weight:bold;"); } sb.append(" font-size:"); sb.append(size); sb.append("pt;"); return sb.toString(); } private static HTML2Text html2text; // TODO java doc public static synchronized String htmlToText(String html) { if(html2text == null) { html2text = new HTML2Text(); } else { html2text.reset(); } Reader in = new StringReader(html); try { html2text.parse(in); } catch(Exception e) { } finally { IOUtils.closeSilent(in); } return html2text.getText(); } private static class HTML2Text extends HTMLEditorKit.ParserCallback { StringBuffer stringBuffer; Stack indentStack; static class IndexType { String type; int counter; IndexType(String type) { this.type = type; counter = 0; } } HTML2Text() { stringBuffer = new StringBuffer(); indentStack = new Stack(); } void reset() { stringBuffer.setLength(0); indentStack.clear(); } void parse(Reader in) throws IOException { ParserDelegator delegator = new ParserDelegator(); // the third parameter is TRUE to ignore charset directive delegator.parse(in,this,Boolean.TRUE); } public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) { if(t.toString().equals("p")) { if(stringBuffer.length() > 0 && !stringBuffer.substring(stringBuffer.length() - 1).equals("\n")) { newLine(); } newLine(); } else if(t.toString().equals("ol")) { indentStack.push(new IndexType("ol")); newLine(); } else if(t.toString().equals("ul")) { indentStack.push(new IndexType("ul")); newLine(); } else if(t.toString().equals("li")) { IndexType parent = indentStack.peek(); if(parent.type.equals("ol")) { String numberString = "" + (++parent.counter) + "."; stringBuffer.append(numberString); for(int i = 0; i < (4 - numberString.length()); i++) { stringBuffer.append(" "); } } else { stringBuffer.append("* "); } indentStack.push(new IndexType("li")); } else if(t.toString().equals("dl")) { newLine(); } else if(t.toString().equals("dt")) { newLine(); } else if(t.toString().equals("dd")) { indentStack.push(new IndexType("dd")); newLine(); } } void newLine() { stringBuffer.append("\n"); for(int i = 0; i < indentStack.size(); i++) { stringBuffer.append(" "); } } public void handleEndTag(HTML.Tag t, int pos) { if(t.toString().equals("p")) { newLine(); } else if(t.toString().equals("ol")) { indentStack.pop(); ; newLine(); } else if(t.toString().equals("ul")) { indentStack.pop(); ; newLine(); } else if(t.toString().equals("li")) { indentStack.pop(); ; newLine(); } else if(t.toString().equals("dd")) { indentStack.pop(); ; } } public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) { if(t.toString().equals("br")) { newLine(); } } public void handleText(char[] text, int pos) { stringBuffer.append(text); } String getText() { return stringBuffer.toString(); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy