net.sf.saxon.serialize.XHTML5Emitter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of Saxon-HE Show documentation
Show all versions of Saxon-HE Show documentation
The XSLT and XQuery Processor
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2023 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.serialize;
import net.sf.saxon.om.NamespaceUri;
import net.sf.saxon.om.NodeName;
import net.sf.saxon.s9api.Location;
import net.sf.saxon.str.StringConstants;
import net.sf.saxon.str.UnicodeString;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.value.Whitespace;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
/**
* XHTML5Emitter is an Emitter that generates XHTML 5 output.
* It is the same as XMLEmitter except that it follows the legacy HTML browser
* compatibility rules: for example, generating empty elements such as [BR /], and
* using [p][/p] for empty paragraphs rather than [p/]
*/
public class XHTML5Emitter extends XMLEmitter {
private static final String[] html5ElementNames = {
"a", "abbr", "address", "area", "article", "aside", "audio",
"b", "base", "bdi", "bdo", "blockquote", "body", "br", "button",
"canvas", "caption", "cite", "code", "col", "colgroup", /*"command",*/
"datalist", "dd", "del", "details", "dfn", "dialog", "div", "dl", "dt",
"em", "embed",
"fieldset", "figcaption", "figure", "footer", "form",
"h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
"i", "iframe", "img", "input", "ins",
"kbd", "keygen",
"label", "legend", "li", "link",
"map", "mark", "menu", "meta", "meter",
"nav", "noscript",
"object", "ol", "optgroup", "option", "output",
"p", "param", "pre", "progress",
"q",
"rp", "rt", "ruby",
"s", "samp", "script", "section", "select", "small", "source", "span", "strong", "style", "sub", "summary", "sup",
"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "time", "title", "tr", "track",
"u", "ul",
"var", "video",
"wbr"
};
static Set html5Elements = new HashSet<>(128);
static Set emptyTags5 = new HashSet<>(31);
private static final String[] emptyTagNames5 = {
"area", "base", "br", "col", /*"command",*/ "embed", "hr", "img", "input", "keygen", "link", "meta", "param",
"source", "track", "wbr"
};
static {
Collections.addAll(emptyTags5, emptyTagNames5);
Collections.addAll(html5Elements, html5ElementNames);
}
private boolean isRecognizedHtmlElement(NodeName name) {
return name.hasURI(NamespaceUri.XHTML) ||
name.hasURI(NamespaceUri.NULL) && html5Elements.contains(name.getLocalPart().toLowerCase());
}
/**
* Output the document type declaration
*
* @param name the qualified name of the element
* @param displayName The element name as displayed
* @param systemId The DOCTYPE system identifier
* @param publicId The DOCTYPE public identifier
* @throws net.sf.saxon.trans.XPathException
* if an error occurs writing to the output
*/
@Override
protected void writeDocType(NodeName name, String displayName, String systemId, String publicId) throws XPathException {
if (systemId == null &&
isRecognizedHtmlElement(name) && name.getLocalPart().toLowerCase().equals("html")) {
try {
writer.writeAscii(DOCTYPE);
writer.write(displayName);
writer.writeCodePoint('>');
} catch (IOException e) {
throw new XPathException(e);
}
} else if (systemId != null) {
super.writeDocType(name, displayName, systemId, publicId);
}
}
@Override
protected boolean writeDocTypeWithNullSystemId() {
return true;
}
/**
* Close an empty element tag.
*/
@Override
protected void writeEmptyElementTagCloser(String displayName, /*@NotNull*/ NodeName name) throws IOException {
if (isRecognizedHtmlElement(name) && emptyTags5.contains(name.getLocalPart())) {
writer.writeAscii(StringConstants.EMPTY_TAG_END);
} else {
writer.writeAscii(StringConstants.EMPTY_TAG_MIDDLE);
writer.write(displayName);
writer.writeCodePoint('>');
}
}
/**
* Character data.
*/
@Override
public void characters(UnicodeString chars, Location locationId, int properties) throws XPathException {
//noinspection StatementWithEmptyBody
if (!started && Whitespace.isAllWhite(chars)) {
// Ignore whitespace before the first start tag. This isn't explicit in the spec, but
// we would otherwise need to buffer such whitespace, because we need to output a DOCTYPE
// declaration based on the content of the first element tag.
} else {
super.characters(chars, locationId, properties);
}
}
}