net.sf.saxon.serialize.HTMLIndenter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of Saxon-HE Show documentation
Show all versions of Saxon-HE Show documentation
The XSLT and XQuery Processor
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2015 Saxonica Limited.
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.serialize;
import net.sf.saxon.event.ProxyReceiver;
import net.sf.saxon.event.Receiver;
import net.sf.saxon.event.ReceiverOptions;
import net.sf.saxon.expr.parser.ExplicitLocation;
import net.sf.saxon.expr.parser.Location;
import net.sf.saxon.lib.NamespaceConstant;
import net.sf.saxon.om.FingerprintedQName;
import net.sf.saxon.om.NodeName;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.tree.tiny.CharSlice;
import net.sf.saxon.type.SchemaType;
import java.util.Arrays;
import java.util.HashSet;
/**
* HTMLIndenter: This ProxyEmitter indents HTML elements, by adding whitespace
* character data where appropriate.
* The character data is never added when within an inline element.
* The string used for indentation defaults to three spaces
*
* @author Michael Kay
*/
public class HTMLIndenter extends ProxyReceiver {
private int level = 0;
protected char[] indentChars = {'\n', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '};
private boolean sameLine = false;
private boolean inFormattedTag = false;
private boolean afterInline = false;
private boolean afterFormatted = true; // to prevent a newline at the start
/*@NotNull*/ private int[] propertyStack = new int[20];
private NameClassifier classifier;
/**
* The NameClassifier classifies element names according to whether the element is (a) an inline element,
* and/or (b) a formatted element
*/
interface NameClassifier {
int IS_INLINE = 1;
int IS_FORMATTED = 2;
/**
* Classify an element name as inline, formatted, or both or neither.
* This method is overridden in the XHTML indenter
*
* @param name the element name
* @return a bit-significant integer containing flags IS_INLINE and/or IS_FORMATTED
*/
int classifyTag(NodeName name);
}
final private static String[] inlineTags = {
"tt", "i", "b", "u", "s", "strike", "big", "small", "em", "strong", "dfn", "code", "samp",
"kbd", "var", "cite", "abbr", "acronym", "a", "img", "applet", "object", "font",
"basefont", "br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe", "input",
"select", "textarea", "label", "button", "ins", "del"};
// INS and DEL are not actually inline elements, but the SGML DTD for HTML
// (apparently) permits them to be used as if they were.
final static String[] formattedTags = {"pre", "script", "style", "textarea", "xmp"};
// "xmp" is obsolete but still encountered!
/**
* Class to classify HTML names
*/
static class HTMLNameClassifier implements NameClassifier {
// the list of inline tags is from the HTML 4.0 (loose) spec. The significance is that we
// mustn't add spaces immediately before or after one of these elements.
final static HTMLNameClassifier THE_INSTANCE = new HTMLNameClassifier();
private static HTMLTagHashSet inlineTable = new HTMLTagHashSet(101);
static {
for (String inlineTag : inlineTags) {
inlineTable.add(inlineTag);
}
}
/**
* Classify an element name as inline, formatted, or both or neither.
* This method is overridden in the XHTML indenter
*
* @param elemName the element name
* @return a bit-significant integer containing flags IS_INLINE and/or IS_FORMATTED
*/
public int classifyTag(NodeName elemName) {
int r = 0;
String tag = elemName.getDisplayName();
if (inlineTable.contains(tag)) {
r |= IS_INLINE;
}
if (formattedTable.contains(tag)) {
r |= IS_FORMATTED;
}
return r;
}
// Table of preformatted elements
private static HTMLTagHashSet formattedTable = new HTMLTagHashSet(23);
static {
for (String formattedTag : formattedTags) {
formattedTable.add(formattedTag);
}
}
}
/**
* Class to classify XHTML names
*/
static class XHTMLNameClassifier implements NameClassifier {
final static XHTMLNameClassifier THE_INSTANCE = new XHTMLNameClassifier();
private final static HashSet inlineTagSet;
private final static HashSet formattedTagSet;
static {
inlineTagSet = new HashSet(50);
formattedTagSet = new HashSet(10);
for (String inlineTag : inlineTags) {
inlineTagSet.add(new FingerprintedQName("", NamespaceConstant.XHTML, inlineTag));
}
for (String formattedTag : formattedTags) {
formattedTagSet.add(new FingerprintedQName("", NamespaceConstant.XHTML, formattedTag));
}
}
/**
* Classify an element name as inline, formatted, or both or neither.
* This method is overridden in the XHTML indenter
*
* @param name the element name
* @return a bit-significant integer containing flags IS_INLINE and/or IS_FORMATTED
*/
public int classifyTag(NodeName name) {
int r = 0;
if (inlineTagSet.contains(name)) {
r |= IS_INLINE;
}
if (formattedTagSet.contains(name)) {
r |= IS_FORMATTED;
}
return r;
}
}
public HTMLIndenter(Receiver next, String method) {
super(next);
if ("xhtml".equals(method)) {
classifier = XHTMLNameClassifier.THE_INSTANCE;
} else {
classifier = HTMLNameClassifier.THE_INSTANCE;
}
}
/**
* Output element start tag
*/
public void startElement(NodeName nameCode, SchemaType typeCode, Location location, int properties) throws XPathException {
int tagProps = classifier.classifyTag(nameCode);
if (level >= propertyStack.length) {
int[] p2 = new int[level * 2];
System.arraycopy(propertyStack, 0, p2, 0, propertyStack.length);
propertyStack = p2;
}
propertyStack[level] = tagProps;
boolean inlineTag = (tagProps & NameClassifier.IS_INLINE) != 0;
inFormattedTag = inFormattedTag || ((tagProps & NameClassifier.IS_FORMATTED) != 0);
if (!inlineTag && !inFormattedTag &&
!afterInline && !afterFormatted) {
indent();
}
nextReceiver.startElement(nameCode, typeCode, location, properties);
level++;
sameLine = true;
afterInline = false;
afterFormatted = false;
}
/**
* Output element end tag
*/
public void endElement() throws XPathException {
level--;
boolean thisInline = (propertyStack[level] & NameClassifier.IS_INLINE) != 0;
boolean thisFormatted = (propertyStack[level] & NameClassifier.IS_FORMATTED) != 0;
if (!thisInline && !thisFormatted && !afterInline &&
!sameLine && !afterFormatted && !inFormattedTag) {
indent();
afterInline = false;
afterFormatted = false;
} else {
afterInline = thisInline;
afterFormatted = thisFormatted;
}
nextReceiver.endElement();
inFormattedTag = inFormattedTag && !thisFormatted;
sameLine = false;
}
/**
* Output character data
*/
public void characters(CharSequence chars, Location locationId, int properties) throws XPathException {
if (inFormattedTag ||
(properties & ReceiverOptions.USE_NULL_MARKERS) != 0 ||
(properties & ReceiverOptions.DISABLE_ESCAPING) != 0) {
// don't split the text if in a tag such as , or if the text contains the result of
// expanding a character map or was produced using disable-output-escaping
nextReceiver.characters(chars, locationId, properties);
} else {
// otherwise try to split long lines into multiple lines
int lastNL = 0;
for (int i = 0; i < chars.length(); i++) {
if (chars.charAt(i) == '\n' || (i - lastNL > getLineLength() && chars.charAt(i) == ' ')) {
sameLine = false;
nextReceiver.characters(chars.subSequence(lastNL, i), locationId, properties);
indent();
lastNL = i + 1;
while (lastNL < chars.length() && chars.charAt(lastNL) == ' ') {
lastNL++;
}
}
}
if (lastNL < chars.length()) {
nextReceiver.characters(chars.subSequence(lastNL, chars.length()), locationId, properties);
}
}
afterInline = false;
}
/**
* Output a comment
*/
public void comment(CharSequence chars, Location locationId, int properties) throws XPathException {
indent();
nextReceiver.comment(chars, locationId, properties);
}
/**
* Output white space to reflect the current indentation level
*
* @throws net.sf.saxon.trans.XPathException
* if an error occurs downstream in the pipeline
*/
private void indent() throws XPathException {
int spaces = level * getIndentation();
if (spaces + 1 >= indentChars.length) {
int increment = 5 * getIndentation();
if (spaces + 1 > indentChars.length + increment) {
increment += spaces + 1;
}
char[] c2 = new char[indentChars.length + increment];
System.arraycopy(indentChars, 0, c2, 0, indentChars.length);
Arrays.fill(c2, indentChars.length, c2.length, ' ');
indentChars = c2;
}
nextReceiver.characters(new CharSlice(indentChars, 0, spaces + 1), ExplicitLocation.UNKNOWN_LOCATION, 0);
sameLine = false;
}
/**
* Get the number of spaces to be used for indentation
*
* @return the number of spaces to be added to the indentation for each level
*/
protected int getIndentation() {
return 3;
}
/**
* Get the maximum length of lines, after which long lines will be word-wrapped
*
* @return the maximum line length
*/
protected int getLineLength() {
return 80;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy