All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.htmlcleaner.EpublibXmlSerializer Maven / Gradle / Ivy

The newest version!
package org.htmlcleaner;

import java.io.IOException;
import java.io.Writer;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

public class EpublibXmlSerializer extends SimpleXmlSerializer {
	private String outputEncoding;
	
	public EpublibXmlSerializer(CleanerProperties paramCleanerProperties, String outputEncoding) {
		super(paramCleanerProperties);
		this.outputEncoding = outputEncoding;
	}

	protected String escapeXml(String xmlContent) {
		return xmlContent;
    }

	/**
	 * Differs from the super.serializeOpenTag in that it:
	 * 
    *
  • skips the xmlns:xml="xml" attribute
  • *
  • if the tagNode is a meta tag setting the contentType then it sets the encoding to the actual encoding
  • *
*/ protected void serializeOpenTag(TagNode tagNode, Writer writer, boolean newLine) throws IOException { String tagName = tagNode.getName(); if (Utils.isEmptyString(tagName)) { return; } boolean nsAware = props.isNamespacesAware(); Set definedNSPrefixes = null; Set additionalNSDeclNeeded = null; String tagPrefix = Utils.getXmlNSPrefix(tagName); if (tagPrefix != null) { if (nsAware) { definedNSPrefixes = new HashSet(); tagNode.collectNamespacePrefixesOnPath(definedNSPrefixes); if ( !definedNSPrefixes.contains(tagPrefix) ) { additionalNSDeclNeeded = new TreeSet(); additionalNSDeclNeeded.add(tagPrefix); } } else { tagName = Utils.getXmlName(tagName); } } writer.write("<" + tagName); if (isMetaContentTypeTag(tagNode)) { tagNode.setAttribute("content", "text/html; charset=" + outputEncoding); } // write attributes for (Map.Entry entry: tagNode.getAttributes().entrySet()) { String attName = entry.getKey(); String attPrefix = Utils.getXmlNSPrefix(attName); if (attPrefix != null) { if (nsAware) { // collect used namespace prefixes in attributes in order to explicitly define // ns declaration if needed; otherwise it would be ill-formed xml if (definedNSPrefixes == null) { definedNSPrefixes = new HashSet(); tagNode.collectNamespacePrefixesOnPath(definedNSPrefixes); } if ( !definedNSPrefixes.contains(attPrefix) ) { if (additionalNSDeclNeeded == null) { additionalNSDeclNeeded = new TreeSet(); } additionalNSDeclNeeded.add(attPrefix); } } else { attName = Utils.getXmlName(attName); } } writer.write(" " + attName + "=\"" + escapeXml(entry.getValue()) + "\""); } // write namespace declarations if (nsAware) { Map nsDeclarations = tagNode.getNamespaceDeclarations(); if (nsDeclarations != null) { for (Map.Entry entry: nsDeclarations.entrySet()) { String prefix = entry.getKey(); String att = "xmlns"; if (prefix.length() > 0) { att += ":" + prefix; } writer.write(" " + att + "=\"" + escapeXml(entry.getValue()) + "\""); } } } // write additional namespace declarations needed for this tag in order xml to be well-formed if (additionalNSDeclNeeded != null) { for (String prefix: additionalNSDeclNeeded) { // skip the xmlns:xml="xml" attribute if (prefix.equalsIgnoreCase("xml")) { continue; } writer.write(" xmlns:" + prefix + "=\"" + prefix + "\""); } } if ( isMinimizedTagSyntax(tagNode) ) { writer.write(" />"); if (newLine) { writer.write("\n"); } } else if (dontEscape(tagNode)) { writer.write(">"); } } private boolean isMetaContentTypeTag(TagNode tagNode) { return tagNode.getName().equalsIgnoreCase("meta") && "Content-Type".equalsIgnoreCase(tagNode.getAttributeByName("http-equiv")); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy