All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.serialize.HTMLEmitter Maven / Gradle / Ivy

There is a newer version: 10.5
Show newest version
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2013 Saxonica Limited.
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

package net.sf.saxon.serialize;

import net.sf.saxon.event.ReceiverOptions;
import net.sf.saxon.lib.SaxonOutputKeys;
import net.sf.saxon.om.NodeName;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.tree.tiny.CompressedWhitespace;
import net.sf.saxon.type.SchemaType;

import javax.xml.transform.OutputKeys;
import java.util.Stack;

/**
  * This class generates HTML output
  * @author Michael H. Kay
  */

public abstract class HTMLEmitter extends XMLEmitter {

	/**
	* Preferred character representations
	*/

    private static final int REP_NATIVE = 0;
	private static final int REP_ENTITY = 1;
	private static final int REP_DECIMAL = 2;
	private static final int REP_HEX = 3;

	private int nonASCIIRepresentation = REP_NATIVE;
	private int excludedRepresentation = REP_ENTITY;

	private int inScript;
    protected int version = 4;
	private String parentElement;
    private String uri;
    private boolean escapeNonAscii = false;
    private Stack nodeNameStack = new Stack();

	/**
	 * Decode preferred representation
     * @param rep string containing preferred representation (native, entity, decimal, or hex)
     * @return integer code for the preferred representation
	*/

	private static int representationCode(String rep) {
		if (rep.equalsIgnoreCase("native")) return REP_NATIVE;
		if (rep.equalsIgnoreCase("entity")) return REP_ENTITY;
		if (rep.equalsIgnoreCase("decimal")) return REP_DECIMAL;
		if (rep.equalsIgnoreCase("hex")) return REP_HEX;
		return REP_ENTITY;
	}

    /**
    * Table of HTML tags that have no closing tag
    */

    static HTMLTagHashSet emptyTags = new HTMLTagHashSet(31);



    protected static void setEmptyTag(String tag) {
        emptyTags.add(tag);
    }

    protected static boolean isEmptyTag(String tag) {
        return emptyTags.contains(tag);
    }

    /**
    * Table of boolean attributes
    */

    // we use two HashMaps to avoid unnecessary string concatenations

    // Sizes must be large enough: this hash set cannot grow beyond the initial size
    private static HTMLTagHashSet booleanAttributes = new HTMLTagHashSet(43);
    private static HTMLTagHashSet booleanCombinations = new HTMLTagHashSet(57);

    static {
            setBooleanAttribute("*", "hidden"); // HTML5
            setBooleanAttribute("area", "nohref");
            setBooleanAttribute("audio", "autoplay"); // HTML5
            setBooleanAttribute("audio", "controls"); // HTML5
            setBooleanAttribute("audio", "loop"); // HTML5
            setBooleanAttribute("audio", "muted"); // HTML5
            setBooleanAttribute("button", "disabled");
            setBooleanAttribute("button", "autofocus"); // HTML5
            setBooleanAttribute("button", "formnovalidate"); //HTML5
            setBooleanAttribute("details", "open"); // HTML5
            setBooleanAttribute("dialog", "open"); // HTML5
            setBooleanAttribute("dir", "compact");
            setBooleanAttribute("dl", "compact");
            setBooleanAttribute("fieldset", "disabled"); //HTML5
            setBooleanAttribute("form", "novalidate"); // HTML5
            setBooleanAttribute("frame", "noresize");
            setBooleanAttribute("hr", "noshade");
            setBooleanAttribute("img", "ismap");
            setBooleanAttribute("input", "checked");
            setBooleanAttribute("input", "disabled");
            setBooleanAttribute("input", "multiple"); //HTML5
            setBooleanAttribute("input", "readonly");
            setBooleanAttribute("input", "required"); //HTML5
            setBooleanAttribute("input", "autofocus"); // HTML5
            setBooleanAttribute("input", "formnovalidate"); //HTML5
            setBooleanAttribute("iframe", "seamless"); // HTML5
            setBooleanAttribute("keygen", "autofocus"); // HTML5
            setBooleanAttribute("keygen", "disabled"); //HTML5
            setBooleanAttribute("menu", "compact");
            setBooleanAttribute("object", "declare");
            setBooleanAttribute("object", "typemustmatch"); // HTML5
            setBooleanAttribute("ol", "compact");
            setBooleanAttribute("ol", "reversed"); // HTML5
            setBooleanAttribute("optgroup", "disabled");
            setBooleanAttribute("option", "selected");
            setBooleanAttribute("option", "disabled");
            setBooleanAttribute("script", "defer");
            setBooleanAttribute("script", "async");   // HTML5
            setBooleanAttribute("select", "multiple");
            setBooleanAttribute("select", "disabled");
            setBooleanAttribute("select", "autofocus"); // HTML5
            setBooleanAttribute("select", "required"); // HTML5
            setBooleanAttribute("style", "scoped"); // HTML5
            setBooleanAttribute("td", "nowrap");
            setBooleanAttribute("textarea", "disabled");
            setBooleanAttribute("textarea", "readonly");
            setBooleanAttribute("textarea", "autofocus"); // HTML5
            setBooleanAttribute("textarea", "required"); // HTML5
            setBooleanAttribute("th", "nowrap");
            setBooleanAttribute("track", "default"); // HTML5
            setBooleanAttribute("ul", "compact");
            setBooleanAttribute("video", "autoplay"); // HTML5
            setBooleanAttribute("video", "controls"); // HTML5
            setBooleanAttribute("video", "loop"); // HTML5
            setBooleanAttribute("video", "muted"); // HTML5
        }

    private static void setBooleanAttribute(String element, String attribute) {
        booleanAttributes.add(attribute);
        booleanCombinations.add(element + '+' + attribute);
    }

    private static boolean isBooleanAttribute(String element, String attribute, String value) {
        return attribute.equalsIgnoreCase(value) &&
                booleanAttributes.contains(attribute) &&
                ( booleanCombinations.contains(element + '+' + attribute) ||
                  booleanCombinations.contains("*+" + attribute));
    }

    /**
    * Constructor
    */

    public HTMLEmitter() {

    }

    /**
     * Say that all non-ASCII characters should be escaped, regardless of the character encoding
     * @param escape true if all non ASCII characters should be escaped
     */

    public void setEscapeNonAscii(Boolean escape) {
        escapeNonAscii = escape;
    }

    /**
     * Decide whether an element is "serialized as an HTML element" in the language of the 3.0 specification
     * @return true if the element
     */

    protected abstract boolean isHTMLElement(NodeName name);

    /**
    * Output start of document
    */

    public void open() throws XPathException {}

    protected void openDocument() throws XPathException {
        if (writer==null) {
            makeWriter();
        }
        if (started) {
            return;
        }
        started = true;
            // This method is sometimes called twice, especially during an identity transform
            // This check stops two DOCTYPE declarations being output.


        String byteOrderMark = outputProperties.getProperty(SaxonOutputKeys.BYTE_ORDER_MARK);

        if ("yes".equals(byteOrderMark) &&
                "UTF-8".equalsIgnoreCase(outputProperties.getProperty(OutputKeys.ENCODING))) {
            try {
                writer.write('\uFEFF');
            } catch (java.io.IOException err) {
                // Might be an encoding exception; just ignore it
            }
        }
        inScript = -1000000;
    }

    /**
     * Output the document type declaration
     * @param displayName     The element name
     * @param systemId The DOCTYPE system identifier
     * @param publicId The DOCTYPE public identifier
     */

    protected void writeDocType(NodeName name, String displayName, String systemId, String publicId) throws XPathException {
        super.writeDocType(name, displayName, systemId, publicId);
    }

    /**
    * Output element start tag
    */

    public void startElement(NodeName elemName, SchemaType typeCode, int locationId, int properties) throws XPathException {

        super.startElement(elemName, typeCode, locationId, properties);
		uri = elemName.getURI();
        parentElement = elementStack.peek();
        if (elemName.isInNamespace("") &&
                (   parentElement.equalsIgnoreCase("script") ||
                    parentElement.equalsIgnoreCase("style"))) {
            inScript = 0;
        }
        inScript++;
        nodeNameStack.push(elemName);
    }

    public void startContent() throws XPathException {
        closeStartTag();                   // prevent  syntax
    }

    /**
    * Write attribute name=value pair. Overrides the XML behaviour if the name and value
    * are the same (we assume this is a boolean attribute to be minimised), or if the value is
    * a URL.
    */

    protected void writeAttribute(NodeName elCode, String attname, CharSequence value, int properties) throws XPathException {
        try {
            if (uri.length()==0) {
                if (isBooleanAttribute(parentElement, attname, value.toString())) {
                    writer.write(attname);
                    return;
                }
            }
            super.writeAttribute(elCode, attname, value, properties);
        } catch (java.io.IOException err) {
            throw new XPathException(err);
        }
    }


    /**
    * Escape characters. Overrides the XML behaviour
    */

    protected void writeEscape(final CharSequence chars, final boolean inAttribute)
    throws java.io.IOException, XPathException {

        int segstart = 0;
        final boolean[] specialChars = (inAttribute ? specialInAtt : specialInText);

        if (chars instanceof CompressedWhitespace) {
            ((CompressedWhitespace)chars).writeEscape(specialChars, writer);
            return;
        }
        boolean disabled = false;

        while (segstart < chars.length()) {
            int i = segstart;

            // find a maximal sequence of "ordinary" characters

            if (escapeNonAscii) {
                char c;
                while (i < chars.length() && (c = chars.charAt(i)) < 127 && !specialChars[c]) {
                    i++;
                }
            } else {
                char c;
                while (i < chars.length() &&
                        ((c = chars.charAt(i)) < 127 ? !specialChars[c] : (characterSet.inCharset(c) && c > 160)
     				 )
     			  ) {
                    i++;
                }
            }

            // if this was the whole string, output the string and quit

            if (i == chars.length()) {
                if (segstart == 0) {
                    writeCharSequence(chars);
                } else {
                    writeCharSequence(chars.subSequence(segstart, i));
                }
                return;
            }

            // otherwise, output this sequence and continue
            if (i > segstart) {
                writeCharSequence(chars.subSequence(segstart, i));
            }

            final char c = chars.charAt(i);
            if (c==0) {
                // used to switch escaping on and off
                disabled = !disabled;
            } else if (disabled) {
                writer.write(c);
            } else if (c<=127) {
                // handle a special ASCII character
                if (inAttribute) {
                    if (c=='<') {
                        writer.write('<');      // not escaped
                    } else if (c=='>') {
                        writer.write(">");   // recommended for older browsers
                    } else if (c=='&') {
                        if (i+1') {
                        writer.write(">");  // changed to allow for "]]>"
                    } else if (c=='&') {
                        writer.write("&");
                    } else if (c=='\r') {
                        writer.write("
");
                    }
                }

            } else if (c < 160) {
                // these control characters are illegal in HTML
                XPathException err = new XPathException("Illegal HTML character: decimal " + (int)c);
                err.setErrorCode("SERE0014");
                throw err;

            } else if (c==160) {
        		// always output NBSP as an entity reference
            	writer.write(" ");

            } else if (c>=55296 && c<=56319) {  //handle surrogate pair

                //A surrogate pair is two consecutive Unicode characters.  The first
                //is in the range D800 to DBFF, the second is in the range DC00 to DFFF.
                //To compute the numeric value of the character corresponding to a surrogate
                //pair, use this formula (all numbers are hex):
        	    //(FirstChar - D800) * 400 + (SecondChar - DC00) + 10000

                    // we'll trust the data to be sound
                int charval = (((int)c - 55296) * 1024) + ((int)chars.charAt(i+1) - 56320) + 65536;
                characterReferenceGenerator.outputCharacterReference(charval, writer);
                i++;

            } else if (escapeNonAscii || !characterSet.inCharset(c)) {
                characterReferenceGenerator.outputCharacterReference(c, writer);
            } else {
                writer.write(c);
            }
            segstart = ++i;
        }

    }

    /**
    * Output an element end tag.
    */

    public void endElement() throws XPathException {
        NodeName nodeName = nodeNameStack.pop();
        String name = elementStack.peek();
        inScript--;
        if (inScript==0) {
            inScript = -1000000;
        }

        if (isEmptyTag(name) && isHTMLElement(nodeName)) {
            // no end tag required
            elementStack.pop();
        } else {
            super.endElement();
        }

    }

    /**
    * Character data.
    */

    public void characters (CharSequence chars, int locationId, int properties)
    throws XPathException {
        int options = properties;
        if (inScript>0) {
            options |= ReceiverOptions.DISABLE_ESCAPING;
        }
        super.characters(chars, locationId, options);
    }

    /**
    * Handle a processing instruction.
    */

    public void processingInstruction (String target, CharSequence data, int locationId, int properties)
        throws XPathException
    {
        if (!started) {
            openDocument();
        }
        for (int i=0; i') {
                XPathException err = new XPathException("A processing instruction in HTML must not contain a > character");
                err.setErrorCode("SERE0015");
                throw err;
            }
        }
        try {
            writer.write("');
        } catch (java.io.IOException err) {
            throw new XPathException(err);
        }
    }





}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy