
com.sta.cts.HTML2XML Maven / Gradle / Ivy
package com.sta.cts;
import java.util.Enumeration;
import java.util.Hashtable;
import java.io.IOException;
import com.sta.mlogger.MLogger;
/**
* Name: HTML2XML
* Description: Hilfsklasse, um HTML nach XML (nicht XHTML!) zu konvertieren.
*
* Comment: ...
*
* Copyright: Copyright (c) 2016, 2018, 2019
* Company: >StA-Soft<
* @author StA
* @version 1.0
*/
public class HTML2XML
{
/**
* Tags, die als leere Tags vorkommen (k?nnen).
*/
private static Hashtable statEmptyTags = new Hashtable();
static
{
statEmptyTags.put("meta", 1);
statEmptyTags.put("link", 1);
statEmptyTags.put("br", 1);
statEmptyTags.put("img", 1);
statEmptyTags.put("col", 1);
statEmptyTags.put("hr", 1);
}
/**
* Entities.
*/
private static Hashtable statEntities = new Hashtable();
/**
* Entities vorbereiten.
*/
static
{
// https://wiki.selfhtml.org/wiki/Referenz:HTML/Zeichenreferenz
String[] entities = {
"nbsp",
"iexcl",
"cent",
"pound",
"curren",
"yen",
"brvbar",
"sect",
"uml",
"copy",
"ordf",
"laquo",
"not",
"shy",
"reg",
"macr",
"deg",
"plusmn",
"sup2",
"sup3",
"acute",
"micro",
"para",
"middot",
"cedil",
"sup1",
"ordm",
"raquo",
"frac14",
"frac12",
"frac34",
"iquest",
"Agrave",
"Aacute",
"Acirc",
"Atilde",
"Auml",
"Aring",
"AElig",
"Ccedil",
"Egrave",
"Eacute",
"Ecirc",
"Euml",
"Igrave",
"Iacute",
"Icirc",
"Iuml",
"ETH",
"Ntilde",
"Ograve",
"Oacute",
"Ocirc",
"Otilde",
"Ouml",
"times",
"Oslash",
"Ugrave",
"Uacute",
"Ucirc",
"Uuml",
"Yacute",
"THORN",
"szlig",
"agrave",
"aacute",
"acirc",
"atilde",
"auml",
"aring",
"aelig",
"ccedil",
"egrave",
"eacute",
"ecirc",
"euml",
"igrave",
"iacute",
"icirc",
"iuml",
"eth",
"ntilde",
"ograve",
"oacute",
"ocirc",
"otilde",
"ouml",
"divide",
"oslash",
"ugrave",
"uacute",
"ucirc",
"uuml",
"yacute",
"thorn",
"yuml"
};
for (int i = 0; i < entities.length; i++)
{
statEntities.put(entities[i], 0xa0 + i);
}
statEntities.put("mdash", 0x2014);
}
/**
* Entities ermitteln.
* @return Entities
*/
public static Hashtable getEntities()
{
return statEntities;
}
/**
* HTML nach XML konvertieren.
* @param xg XML-Generator (Ziel)
* @param xs XML-Scanner (Quelle)
* @throws IOException im Fehlerfall
*/
public static void runConvHTML2XML(XMLGenerator xg, XMLScanner xs) throws IOException
{
while (true)
{
// Token holen
Object obj = xs.getToken();
// Falls es sich um Inhalt (Strings) handelt: einfach kopieren
while ((obj != null) && (obj instanceof String))
{
xg.putContent((String) obj);
obj = xs.getToken();
}
// Falls Dateiende oder kein XMLTag: Ende
if ((obj == null) || (!(obj instanceof XMLTag)))
{
return;
}
// Token ist ein XMLTag
XMLTag tag = (XMLTag) obj;
if (!tag.isOpen())
{
xs.ungetToken(tag);
break;
}
// Falls ?ffnendes Tag: Tag auch in der Zieldatei ?ffnen
String tagname = tag.getName();
xg.openTag(tagname);
// Alle Attribute kopieren
Enumeration e = tag.getAttrNames();
while ((e != null) && e.hasMoreElements())
{
String attrname = (String) e.nextElement();
String attrvalue = tag.getAttr(attrname);
xg.putAttr(attrname, attrvalue);
}
// Falls auch schlie?endes Tag: Token zur?cklegen
if (tag.isClose())
{
tag.resOpen();
xs.ungetToken(tag);
}
else if (statEmptyTags.containsKey(tagname))
{
tag.resOpen();
tag.setClose();
xs.ungetToken(tag);
}
else
{
// Inhalt konvertieren, falls vorhanden
runConvHTML2XML(xg, xs);
}
// Jetzt mu? ein schlie?endes Tag mit gleichem Namen kommen
xg.closeTag(tagname);
String msg = "" + tagname + "> expected.";
obj = xs.getToken();
if (obj == null)
{
MLogger.wrn(msg + " Unexpected end of file.");
continue;
}
if (!(obj instanceof XMLTag))
{
xs.ungetToken(obj);
MLogger.wrn(msg + " Unexpected: " + obj.toString());
continue;
}
tag = (XMLTag) obj;
if (tag.isOpen() || !tag.isClose() || !tag.getName().equals(tagname))
{
xs.ungetToken(obj);
MLogger.wrn(msg + " Found: <" + tag.getName() + "...>.");
continue;
}
}
}
/**
* HTML nach XML konvertieren.
* @param pDstFileName HTML-Dateiname
* @param pSrcFileName XML-Dateiname
* @throws IOException im Fehlerfall
*/
public static void runConvHTML2XML(String pDstFileName, String pSrcFileName) throws IOException
{
XMLScanner xs = new XMLScanner();
xs.setEntities(statEntities);
xs.init(pSrcFileName);
try
{
XMLGenerator xg = new XMLGenerator();
xg.createXML(pDstFileName);
try
{
// xg.directWriteLn("");
// xg.directWriteLn("]>");
runConvHTML2XML(xg, xs);
}
finally
{
xg.closeXML();
}
}
finally
{
xs.done();
}
}
//===========================================================================
/**
* Dummy-Constructor.
*/
protected HTML2XML()
{
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy