
net.jangaroo.exml.configconverter.util.TidyComment Maven / Gradle / Ivy
package net.jangaroo.exml.configconverter.util;
import org.w3c.dom.Document;
import org.w3c.tidy.Tidy;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.ByteArrayInputStream;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
/**
* A helper class to convert HTML-style comments into well-formed XHTML.
*/
public final class TidyComment {
private static final Tidy TIDY;
private TidyComment() {
}
static {
TIDY = new Tidy();
TIDY.setDropEmptyParas(true);
TIDY.setDropFontTags(true);
TIDY.setFixComments(true);
TIDY.setHideEndTags(false);
TIDY.setIndentAttributes(true);
TIDY.setMakeClean(true);
TIDY.setQuiet(true);
TIDY.setQuoteAmpersand(true);
TIDY.setShowWarnings(false);
TIDY.setXHTML(true);
TIDY.setXmlOut(true);
TIDY.setXmlSpace(false);
TIDY.setXmlPi(false);
}
public static String tidy(String dirtyHtml) {
String wrappedHtml = ""+dirtyHtml+"";
StringWriter result = new StringWriter();
try {
Document document = TIDY.parseDOM(new ByteArrayInputStream(wrappedHtml.getBytes("ISO-8859-1")), null);
DOMSource domSource = new DOMSource(document.getDocumentElement());
Transformer serializer = TransformerFactory.newInstance().newTransformer();
serializer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, "-//W3C//DTD XHTML 1.0 Transitional//EN");
serializer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd");
serializer.transform(domSource, new StreamResult(result));
} catch (TransformerException e) {
throw new RuntimeException(e);
} catch (UnsupportedEncodingException e) {
// should not happen for ISO-8859-1:
throw new RuntimeException(e);
}
String xml = result.toString();
if (xml.indexOf("
")!=-1) {
return "";
}
int bodyStart = xml.indexOf("");
if(bodyEnd == -1) {
xml += "";
bodyEnd = xml.indexOf("