All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.xwiki.rendering.renderer.printer.XHTMLWikiPrinter Maven / Gradle / Ivy

There is a newer version: 17.0.0-rc-1
Show newest version
/*
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * This is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */
package org.xwiki.rendering.renderer.printer;

import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.AttributesImpl;
import org.xwiki.xml.html.HTMLElementSanitizer;

/**
 * Base toolkit class for all XHTML-based renderers. This printer handles whitespaces so that it prints " " when
 * needed (i.e. when the spaces are at the beginning or at the end of an element's content or when there are more than 1
 * contiguous spaces, except for CDATA sections and inside PRE elements. It also knows how to handle XHTML comments).
 *
 * @version $Id: 695820996f18e2d0d53ea360266dfd2569ca26ea $
 * @since 1.7M1
 */
public class XHTMLWikiPrinter extends XMLWikiPrinter
{
    /**
     * Prefix that is used for invalid/disallowed attributes.
     *
     * @since 14.6RC1
     */
    public static final String TRANSLATED_ATTRIBUTE_PREFIX = "data-xwiki-translated-attribute-";

    /**
     * Pattern for matching characters not allowed in data attributes.
     * 

* This is the inverse of the definition of a name being * Name production without ":". */ private static final Pattern DATA_REPLACEMENT_PATTERN = Pattern.compile("[^A-Z_a-z\\u00C0-\\u00D6\\u00D8-\\u00F6" + "\\u00F8-\\u02ff\\u0370-\\u037d\\u037f-\\u1fff\\u200c\\u200d\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\ud7ff" + "\\uf900-\\ufdcf\\ufdf0-\\ufffd\\x{10000}-\\x{EFFFF}\\-.0-9\\u00b7\\u0300-\\u036f\\u203f-\\u2040]"); /** * The sanitizer used to restrict allowed elements and attributes, can be null (no restrictions). * * @since 14.6RC1 */ protected final HTMLElementSanitizer htmlElementSanitizer; private int spaceCount; private boolean isInCData; private boolean isInPreserveElement; private boolean elementEnded; private boolean hasTextBeenPrinted; private boolean isStandalone; /** * @param printer the object to which to write the XHTML output to */ public XHTMLWikiPrinter(WikiPrinter printer) { this(printer, null); } /** * @param printer the object to which to write the XHTML output to * @param htmlElementSanitizer the sanitizer to use for sanitizing elements and attributes */ public XHTMLWikiPrinter(WikiPrinter printer, HTMLElementSanitizer htmlElementSanitizer) { super(printer); this.htmlElementSanitizer = htmlElementSanitizer; } /** * Use it to specify that the current element to print is standalone. This value might be used to know if the first * space should be printed with a simple space or a {@code  } entity. Note that the standalone value is * automatically reset after first printing of a space, or when a text is printed. * * @since 12.2 */ public void setStandalone() { this.isStandalone = true; } @Override public void printXML(String str) { handleSpaceWhenInText(); super.printXML(str); this.hasTextBeenPrinted = true; this.isStandalone = false; } @Override public void printXMLElement(String name) { if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) { handleSpaceWhenStartElement(); super.printXMLElement(name); } } @Override public void printXMLElement(String name, String[][] attributes) { if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) { handleSpaceWhenStartElement(); super.printXMLElement(name, cleanAttributes(name, attributes)); } } @Override public void printXMLElement(String name, Map attributes) { if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) { handleSpaceWhenStartElement(); super.printXMLElement(name, cleanAttributes(name, attributes)); } } @Override public void printXMLStartElement(String name) { if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) { handleSpaceWhenStartElement(); super.printXMLStartElement(name); } } @Override public void printXMLStartElement(String name, String[][] attributes) { if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) { handleSpaceWhenStartElement(); super.printXMLStartElement(name, cleanAttributes(name, attributes)); } } @Override public void printXMLStartElement(String name, Map attributes) { if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) { handleSpaceWhenStartElement(); super.printXMLStartElement(name, cleanAttributes(name, attributes)); } } @Override public void printXMLStartElement(String name, Attributes attributes) { if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) { handleSpaceWhenStartElement(); super.printXMLStartElement(name, cleanAttributes(name, attributes)); } } @Override public void printXMLEndElement(String name) { if (this.htmlElementSanitizer == null || this.htmlElementSanitizer.isElementAllowed(name)) { handleSpaceWhenEndlement(); super.printXMLEndElement(name); this.elementEnded = true; } } @Override public void printXMLComment(String content) { printXMLComment(content, false); } @Override public void printXMLComment(String content, boolean escape) { handleSpaceWhenStartElement(); super.printXMLComment(content, escape); this.elementEnded = true; } @Override public void printXMLStartCData() { handleSpaceWhenStartElement(); super.printXMLStartCData(); } @Override public void printXMLEndCData() { handleSpaceWhenEndlement(); super.printXMLEndCData(); } /** * This method should be used to print a space rather than calling printXML(" "). */ public void printSpace() { this.spaceCount++; } @Override public void printRaw(String raw) { handleSpaceWhenStartElement(); // Prevent injecting {{/html}}. We escape {{/html}} as well as prefixes of {{/html}} at the end of the raw // content to avoid that raw content and plain texts can be combined to construct the full {{/html}}. This may // cause errors as we might not be using the right escaping for the context (e.g., JSON or HTML comments) but // for this reason we also escape in JSON output and HTML comments. String escapedRaw = raw.replace("{{/html}}", "{{/html}}"); StringBuilder prefix = new StringBuilder(); for (Character nextChar : List.of('{', '/', 'h', 't', 'm', 'l', '}', '}')) { prefix.append(nextChar); if (escapedRaw.endsWith(prefix.toString())) { escapedRaw = escapedRaw.substring(0, escapedRaw.length() - prefix.length()) + "{" + prefix.substring(1); break; } } super.printRaw(escapedRaw); this.elementEnded = true; } private void handleSpaceWhenInText() { if (this.elementEnded || this.hasTextBeenPrinted) { handleSpaceWhenStartElement(); } else { handleSpaceWhenEndlement(); } } private Map cleanAttributes(String elementName, Map attributes) { Map cleanAttributes; if (this.htmlElementSanitizer == null || attributes == null) { cleanAttributes = attributes; } else { cleanAttributes = new LinkedHashMap<>(); for (Map.Entry e : attributes.entrySet()) { if (this.htmlElementSanitizer.isAttributeAllowed(elementName, e.getKey(), e.getValue())) { cleanAttributes.put(e.getKey(), e.getValue()); } else { // Keep but clean invalid attributes with a prefix (removed during parsing) to avoid loosing them // through WYSIWYG editing. String translatedName = TRANSLATED_ATTRIBUTE_PREFIX + removeInvalidDataAttributeCharacters(e.getKey()); if (this.htmlElementSanitizer.isAttributeAllowed(elementName, translatedName, e.getValue())) { cleanAttributes.put(translatedName, e.getValue()); } } } } return cleanAttributes; } private String[][] cleanAttributes(String elementName, String[][] attributes) { String[][] allowedAttributes; if (this.htmlElementSanitizer == null || attributes == null) { allowedAttributes = attributes; } else { allowedAttributes = Arrays.stream(attributes) .map(entry -> { if (this.htmlElementSanitizer.isAttributeAllowed(elementName, entry[0], entry[1])) { return entry; } else { // Keep but clean invalid attributes with a prefix (removed during parsing) to avoid loosing // them through WYSIWYG editing. String translatedName = TRANSLATED_ATTRIBUTE_PREFIX + removeInvalidDataAttributeCharacters(entry[0]); if (this.htmlElementSanitizer.isAttributeAllowed(elementName, translatedName, entry[1])) { return new String[] { translatedName, entry[1] }; } else { return null; } } }) .filter(Objects::nonNull) .toArray(String[][]::new); } return allowedAttributes; } private Attributes cleanAttributes(String elementName, Attributes attributes) { Attributes allowedAttribute; if (this.htmlElementSanitizer == null || attributes == null) { allowedAttribute = attributes; } else { allowedAttribute = new AttributesImpl(); for (int i = 0; i < attributes.getLength(); ++i) { if (this.htmlElementSanitizer.isAttributeAllowed(elementName, attributes.getQName(i), attributes.getValue(i))) { ((AttributesImpl) allowedAttribute).addAttribute(null, null, attributes.getQName(i), null, attributes.getValue(i)); } else { // Keep but clean invalid attributes with a prefix (removed during parsing) to avoid loosing them // through WYSIWYG editing. String translatedName = TRANSLATED_ATTRIBUTE_PREFIX + removeInvalidDataAttributeCharacters(attributes.getQName(i)); if (this.htmlElementSanitizer.isAttributeAllowed(elementName, translatedName, attributes.getValue(i))) { ((AttributesImpl) allowedAttribute).addAttribute(null, null, translatedName, null, attributes.getValue(i)); } } } } return allowedAttribute; } /** * Strips out invalid characters from names used for data attributes. * * @param name the data attribute name to clean * @return valid name, to be prefixed with data- */ public static String removeInvalidDataAttributeCharacters(String name) { return DATA_REPLACEMENT_PATTERN.matcher(name).replaceAll(""); } private void handleSpaceWhenStartElement() { // Use case: something ... // Use case: something