org.xwiki.rendering.renderer.printer.XHTMLWikiPrinter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of xwiki-rendering-xml Show documentation
XML related tools for the rendering
There is a newer version: 17.0.0-rc-1
/*
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * This is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */
package org.xwiki.rendering.renderer.printer;

import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.AttributesImpl;
import org.xwiki.xml.html.HTMLElementSanitizer;

/**
 * Base toolkit class for all XHTML-based renderers. This printer handles whitespaces so that it prints " " when
 * needed (i.e. when the spaces are at the beginning or at the end of an element's content or when there are more than 1
 * contiguous spaces, except for CDATA sections and inside PRE elements. It also knows how to handle XHTML comments).
 *
 * @version $Id: 695820996f18e2d0d53ea360266dfd2569ca26ea $
 * @since 1.7M1
 */
public class XHTMLWikiPrinter extends XMLWikiPrinter
{
    /**
     * Prefix that is used for invalid/disallowed attributes.
     *
     * @since 14.6RC1
     */
    public static final String TRANSLATED_ATTRIBUTE_PREFIX = "data-xwiki-translated-attribute-";

    /**
     * Pattern for matching characters not allowed in data attributes.
     * 
     * This is the inverse of the definition of a name being
     * Name production without ":".
     */
    private static final Pattern DATA_REPLACEMENT_PATTERN = Pattern.compile("[^A-Z_a-z\\u00C0-\\u00D6\\u00D8-\\u00F6"
        + "\\u00F8-\\u02ff\\u0370-\\u037d\\u037f-\\u1fff\\u200c\\u200d\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\ud7ff"
        + "\\uf900-\\ufdcf\\ufdf0-\\ufffd\\x{10000}-\\x{EFFFF}\\-.0-9\\u00b7\\u0300-\\u036f\\u203f-\\u2040]");

    /**
     * The sanitizer used to restrict allowed elements and attributes, can be null (no restrictions).
     *
     * @since 14.6RC1
     */
    protected final HTMLElementSanitizer htmlElementSanitizer;

    private int spaceCount;

    private boolean isInCData;

    private boolean isInPreserveElement;

    private boolean elementEnded;

    private boolean hasTextBeenPrinted;

    private boolean isStandalone;

    /**
     * @param printer the object to which to write the XHTML output to
     */
    public XHTMLWikiPrinter(WikiPrinter printer)
    {
        this(printer, null);
    }

    /**
     * @param printer the object to which to write the XHTML output to
     * @param htmlElementSanitizer the sanitizer to use for sanitizing elements and attributes
     */
    public XHTMLWikiPrinter(WikiPrinter printer, HTMLElementSanitizer htmlElementSanitizer)
    {
        super(printer);
        this.htmlElementSanitizer = htmlElementSanitizer;
    }

    /**
     * Use it to specify that the current element to print is standalone. This value might be used to know if the first
     * space should be printed with a simple space or a {@code  } entity. Note that the standalone value is
     * automatically reset after first printing of a space, or when a text is printed.
     *
     * @since 12.2
     */
    public void setStandalone()
    {
        this.isStandalone = true;
    }

    @Override
    public void printXML(String str)
    {
        handleSpaceWhenInText();
        super.printXML(str);
        this.hasTextBeenPrinted = true;
        this.isStandalone = false;
    }

    @Override
    public void printXMLElement(String name)
    {
        if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) {
            handleSpaceWhenStartElement();
            super.printXMLElement(name);
        }
    }

    @Override
    public void printXMLElement(String name, String[][] attributes)
    {
        if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) {
            handleSpaceWhenStartElement();
            super.printXMLElement(name, cleanAttributes(name, attributes));
        }
    }

    @Override
    public void printXMLElement(String name, Map attributes)
    {
        if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) {
            handleSpaceWhenStartElement();
            super.printXMLElement(name, cleanAttributes(name, attributes));
        }
    }

    @Override
    public void printXMLStartElement(String name)
    {
        if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) {
            handleSpaceWhenStartElement();
            super.printXMLStartElement(name);
        }
    }

    @Override
    public void printXMLStartElement(String name, String[][] attributes)
    {
        if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) {
            handleSpaceWhenStartElement();
            super.printXMLStartElement(name, cleanAttributes(name, attributes));
        }
    }

    @Override
    public void printXMLStartElement(String name, Map attributes)
    {
        if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) {
            handleSpaceWhenStartElement();
            super.printXMLStartElement(name, cleanAttributes(name, attributes));
        }
    }

    @Override
    public void printXMLStartElement(String name, Attributes attributes)
    {
        if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) {
            handleSpaceWhenStartElement();
            super.printXMLStartElement(name, cleanAttributes(name, attributes));
        }
    }

    @Override
    public void printXMLEndElement(String name)
    {
        if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) {
            handleSpaceWhenEndlement();
            super.printXMLEndElement(name);
            this.elementEnded = true;
        }
    }

    @Override
    public void printXMLComment(String content)
    {
        printXMLComment(content, false);
    }

    @Override
    public void printXMLComment(String content, boolean escape)
    {
        handleSpaceWhenStartElement();
        super.printXMLComment(content, escape);
        this.elementEnded = true;
    }

    @Override
    public void printXMLStartCData()
    {
        handleSpaceWhenStartElement();
        super.printXMLStartCData();
    }

    @Override
    public void printXMLEndCData()
    {
        handleSpaceWhenEndlement();
        super.printXMLEndCData();
    }

    /**
     * This method should be used to print a space rather than calling printXML(" ").
     */
    public void printSpace()
    {
        this.spaceCount++;
    }

    @Override
    public void printRaw(String raw)
    {
        handleSpaceWhenStartElement();
        // Prevent injecting {{/html}}. We escape {{/html}} as well as prefixes of {{/html}} at the end of the raw
        // content to avoid that raw content and plain texts can be combined to construct the full {{/html}}. This may
        // cause errors as we might not be using the right escaping for the context (e.g., JSON or HTML comments) but
        // for this reason we also escape in JSON output and HTML comments.
        String escapedRaw = raw.replace("{{/html}}", "{{/html}}");

        StringBuilder prefix = new StringBuilder();
        for (Character nextChar : List.of('{', '/', 'h', 't', 'm', 'l', '}', '}')) {
            prefix.append(nextChar);

            if (escapedRaw.endsWith(prefix.toString())) {
                escapedRaw =
                    escapedRaw.substring(0, escapedRaw.length() - prefix.length()) + "{" + prefix.substring(1);
                break;
            }
        }
        super.printRaw(escapedRaw);
        this.elementEnded = true;
    }

    private void handleSpaceWhenInText()
    {
        if (this.elementEnded || this.hasTextBeenPrinted) {
            handleSpaceWhenStartElement();
        } else {
            handleSpaceWhenEndlement();
        }
    }

    private Map cleanAttributes(String elementName, Map attributes)
    {
        Map cleanAttributes;

        if (this.htmlElementSanitizer == null || attributes == null) {
            cleanAttributes = attributes;
        } else {
            cleanAttributes = new LinkedHashMap<>();
            for (Map.Entry e : attributes.entrySet()) {
                if (this.htmlElementSanitizer.isAttributeAllowed(elementName, e.getKey(), e.getValue())) {
                    cleanAttributes.put(e.getKey(), e.getValue());
                } else {
                    // Keep but clean invalid attributes with a prefix (removed during parsing) to avoid loosing them
                    // through WYSIWYG editing.
                    String translatedName =
                        TRANSLATED_ATTRIBUTE_PREFIX + removeInvalidDataAttributeCharacters(e.getKey());
                    if (this.htmlElementSanitizer.isAttributeAllowed(elementName, translatedName, e.getValue())) {
                        cleanAttributes.put(translatedName, e.getValue());
                    }
                }
            }
        }

        return cleanAttributes;
    }

    private String[][] cleanAttributes(String elementName, String[][] attributes)
    {
        String[][] allowedAttributes;
        if (this.htmlElementSanitizer == null || attributes == null) {
            allowedAttributes = attributes;
        } else {
            allowedAttributes = Arrays.stream(attributes)
                .map(entry -> {
                    if (this.htmlElementSanitizer.isAttributeAllowed(elementName, entry[0], entry[1])) {
                        return entry;
                    } else {
                        // Keep but clean invalid attributes with a prefix (removed during parsing) to avoid loosing
                        // them through WYSIWYG editing.
                        String translatedName =
                            TRANSLATED_ATTRIBUTE_PREFIX + removeInvalidDataAttributeCharacters(entry[0]);
                        if (this.htmlElementSanitizer.isAttributeAllowed(elementName, translatedName, entry[1])) {
                            return new String[] { translatedName, entry[1] };
                        } else {
                            return null;
                        }
                    }
                })
                .filter(Objects::nonNull)
                .toArray(String[][]::new);
        }

        return allowedAttributes;
    }

    private Attributes cleanAttributes(String elementName, Attributes attributes)
    {
        Attributes allowedAttribute;

        if (this.htmlElementSanitizer == null || attributes == null) {
            allowedAttribute = attributes;
        } else {
            allowedAttribute = new AttributesImpl();

            for (int i = 0; i < attributes.getLength(); ++i) {
                if (this.htmlElementSanitizer.isAttributeAllowed(elementName, attributes.getQName(i),
                    attributes.getValue(i)))
                {
                    ((AttributesImpl) allowedAttribute).addAttribute(null, null, attributes.getQName(i),
                        null, attributes.getValue(i));
                } else {
                    // Keep but clean invalid attributes with a prefix (removed during parsing) to avoid loosing them
                    // through WYSIWYG editing.
                    String translatedName =
                        TRANSLATED_ATTRIBUTE_PREFIX + removeInvalidDataAttributeCharacters(attributes.getQName(i));
                    if (this.htmlElementSanitizer.isAttributeAllowed(elementName, translatedName,
                        attributes.getValue(i)))
                    {
                        ((AttributesImpl) allowedAttribute).addAttribute(null, null,
                            translatedName, null, attributes.getValue(i));
                    }
                }
            }
        }

        return allowedAttribute;
    }

    /**
     * Strips out invalid characters from names used for data attributes.
     *
     * @param name the data attribute name to clean
     * @return valid name, to be prefixed with data-
     */
    public static String removeInvalidDataAttributeCharacters(String name)
    {
        return DATA_REPLACEMENT_PATTERN.matcher(name).replaceAll("");
    }

    private void handleSpaceWhenStartElement()
    {
        // Use case: something ...
        // Use case: something