org.htmlcleaner.EpublibXmlSerializer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of epublib-tools Show documentation
Show all versions of epublib-tools Show documentation
A java library for reading/writing/manipulating epub files
The newest version!
package org.htmlcleaner;
import java.io.IOException;
import java.io.Writer;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
public class EpublibXmlSerializer extends SimpleXmlSerializer {
private String outputEncoding;
public EpublibXmlSerializer(CleanerProperties paramCleanerProperties, String outputEncoding) {
super(paramCleanerProperties);
this.outputEncoding = outputEncoding;
}
protected String escapeXml(String xmlContent) {
return xmlContent;
}
/**
* Differs from the super.serializeOpenTag in that it:
*
* - skips the xmlns:xml="xml" attribute
* - if the tagNode is a meta tag setting the contentType then it sets the encoding to the actual encoding
*
*/
protected void serializeOpenTag(TagNode tagNode, Writer writer, boolean newLine) throws IOException {
String tagName = tagNode.getName();
if (Utils.isEmptyString(tagName)) {
return;
}
boolean nsAware = props.isNamespacesAware();
Set definedNSPrefixes = null;
Set additionalNSDeclNeeded = null;
String tagPrefix = Utils.getXmlNSPrefix(tagName);
if (tagPrefix != null) {
if (nsAware) {
definedNSPrefixes = new HashSet();
tagNode.collectNamespacePrefixesOnPath(definedNSPrefixes);
if ( !definedNSPrefixes.contains(tagPrefix) ) {
additionalNSDeclNeeded = new TreeSet();
additionalNSDeclNeeded.add(tagPrefix);
}
} else {
tagName = Utils.getXmlName(tagName);
}
}
writer.write("<" + tagName);
if (isMetaContentTypeTag(tagNode)) {
tagNode.setAttribute("content", "text/html; charset=" + outputEncoding);
}
// write attributes
for (Map.Entry entry: tagNode.getAttributes().entrySet()) {
String attName = entry.getKey();
String attPrefix = Utils.getXmlNSPrefix(attName);
if (attPrefix != null) {
if (nsAware) {
// collect used namespace prefixes in attributes in order to explicitly define
// ns declaration if needed; otherwise it would be ill-formed xml
if (definedNSPrefixes == null) {
definedNSPrefixes = new HashSet();
tagNode.collectNamespacePrefixesOnPath(definedNSPrefixes);
}
if ( !definedNSPrefixes.contains(attPrefix) ) {
if (additionalNSDeclNeeded == null) {
additionalNSDeclNeeded = new TreeSet();
}
additionalNSDeclNeeded.add(attPrefix);
}
} else {
attName = Utils.getXmlName(attName);
}
}
writer.write(" " + attName + "=\"" + escapeXml(entry.getValue()) + "\"");
}
// write namespace declarations
if (nsAware) {
Map nsDeclarations = tagNode.getNamespaceDeclarations();
if (nsDeclarations != null) {
for (Map.Entry entry: nsDeclarations.entrySet()) {
String prefix = entry.getKey();
String att = "xmlns";
if (prefix.length() > 0) {
att += ":" + prefix;
}
writer.write(" " + att + "=\"" + escapeXml(entry.getValue()) + "\"");
}
}
}
// write additional namespace declarations needed for this tag in order xml to be well-formed
if (additionalNSDeclNeeded != null) {
for (String prefix: additionalNSDeclNeeded) {
// skip the xmlns:xml="xml" attribute
if (prefix.equalsIgnoreCase("xml")) {
continue;
}
writer.write(" xmlns:" + prefix + "=\"" + prefix + "\"");
}
}
if ( isMinimizedTagSyntax(tagNode) ) {
writer.write(" />");
if (newLine) {
writer.write("\n");
}
} else if (dontEscape(tagNode)) {
writer.write(">");
}
}
private boolean isMetaContentTypeTag(TagNode tagNode) {
return tagNode.getName().equalsIgnoreCase("meta")
&& "Content-Type".equalsIgnoreCase(tagNode.getAttributeByName("http-equiv"));
}
}