All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sta.cts.HTML2XML Maven / Gradle / Ivy


package com.sta.cts;

import java.util.Enumeration;
import java.util.Hashtable;

import java.io.IOException;

import com.sta.mlogger.MLogger;

/**
 * 

Name: HTML2XML

*

Description: Hilfsklasse, um HTML nach XML (nicht XHTML!) zu konvertieren. *

*

Comment: ... *

*

Copyright: Copyright (c) 2016, 2018, 2019

*

Company: >StA-Soft<

* @author StA * @version 1.0 */ public class HTML2XML { /** * Tags, die als leere Tags vorkommen (k?nnen). */ private static Hashtable statEmptyTags = new Hashtable(); static { statEmptyTags.put("meta", 1); statEmptyTags.put("link", 1); statEmptyTags.put("br", 1); statEmptyTags.put("img", 1); statEmptyTags.put("col", 1); statEmptyTags.put("hr", 1); } /** * Entities. */ private static Hashtable statEntities = new Hashtable(); /** * Entities vorbereiten. */ static { // https://wiki.selfhtml.org/wiki/Referenz:HTML/Zeichenreferenz String[] entities = { "nbsp", "iexcl", "cent", "pound", "curren", "yen", "brvbar", "sect", "uml", "copy", "ordf", "laquo", "not", "shy", "reg", "macr", "deg", "plusmn", "sup2", "sup3", "acute", "micro", "para", "middot", "cedil", "sup1", "ordm", "raquo", "frac14", "frac12", "frac34", "iquest", "Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave", "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml", "Yacute", "THORN", "szlig", "agrave", "aacute", "acirc", "atilde", "auml", "aring", "aelig", "ccedil", "egrave", "eacute", "ecirc", "euml", "igrave", "iacute", "icirc", "iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde", "ouml", "divide", "oslash", "ugrave", "uacute", "ucirc", "uuml", "yacute", "thorn", "yuml" }; for (int i = 0; i < entities.length; i++) { statEntities.put(entities[i], 0xa0 + i); } statEntities.put("mdash", 0x2014); } /** * Entities ermitteln. * @return Entities */ public static Hashtable getEntities() { return statEntities; } /** * HTML nach XML konvertieren. * @param xg XML-Generator (Ziel) * @param xs XML-Scanner (Quelle) * @throws IOException im Fehlerfall */ public static void runConvHTML2XML(XMLGenerator xg, XMLScanner xs) throws IOException { while (true) { // Token holen Object obj = xs.getToken(); // Falls es sich um Inhalt (Strings) handelt: einfach kopieren while ((obj != null) && (obj instanceof String)) { xg.putContent((String) obj); obj = xs.getToken(); } // Falls Dateiende oder kein XMLTag: Ende if ((obj == null) || (!(obj instanceof XMLTag))) { return; } // Token ist ein XMLTag XMLTag tag = (XMLTag) obj; if (!tag.isOpen()) { xs.ungetToken(tag); break; } // Falls ?ffnendes Tag: Tag auch in der Zieldatei ?ffnen String tagname = tag.getName(); xg.openTag(tagname); // Alle Attribute kopieren Enumeration e = tag.getAttrNames(); while ((e != null) && e.hasMoreElements()) { String attrname = (String) e.nextElement(); String attrvalue = tag.getAttr(attrname); xg.putAttr(attrname, attrvalue); } // Falls auch schlie?endes Tag: Token zur?cklegen if (tag.isClose()) { tag.resOpen(); xs.ungetToken(tag); } else if (statEmptyTags.containsKey(tagname)) { tag.resOpen(); tag.setClose(); xs.ungetToken(tag); } else { // Inhalt konvertieren, falls vorhanden runConvHTML2XML(xg, xs); } // Jetzt mu? ein schlie?endes Tag mit gleichem Namen kommen xg.closeTag(tagname); String msg = " expected."; obj = xs.getToken(); if (obj == null) { MLogger.wrn(msg + " Unexpected end of file."); continue; } if (!(obj instanceof XMLTag)) { xs.ungetToken(obj); MLogger.wrn(msg + " Unexpected: " + obj.toString()); continue; } tag = (XMLTag) obj; if (tag.isOpen() || !tag.isClose() || !tag.getName().equals(tagname)) { xs.ungetToken(obj); MLogger.wrn(msg + " Found: <" + tag.getName() + "...>."); continue; } } } /** * HTML nach XML konvertieren. * @param pDstFileName HTML-Dateiname * @param pSrcFileName XML-Dateiname * @throws IOException im Fehlerfall */ public static void runConvHTML2XML(String pDstFileName, String pSrcFileName) throws IOException { XMLScanner xs = new XMLScanner(); xs.setEntities(statEntities); xs.init(pSrcFileName); try { XMLGenerator xg = new XMLGenerator(); xg.createXML(pDstFileName); try { // xg.directWriteLn(""); // xg.directWriteLn("]>"); runConvHTML2XML(xg, xs); } finally { xg.closeXML(); } } finally { xs.done(); } } //=========================================================================== /** * Dummy-Constructor. */ protected HTML2XML() { } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy