All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.owasp.html.HtmlStreamRenderer Maven / Gradle / Ivy

There is a newer version: 20240325.1
Show newest version
// Copyright (c) 2011, Mike Samuel
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// Neither the name of the OWASP nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

package org.owasp.html;

import com.google.common.annotations.VisibleForTesting;
import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import javax.annotation.WillCloseWhenClosed;
import javax.annotation.concurrent.NotThreadSafe;

/**
 * Given a series of HTML tokens, writes valid, normalized HTML to the output.
 * The output will have well-defined tag boundaries, but there may be orphaned
 * or missing close and open tags.
 * The result of two renderers can always be concatenated to produce a larger
 * snippet of HTML, but if the first was called with
 * {@code writeOpenTag("plaintext", ...)}, then any tags in the second will not
 * be interpreted as tags in the concatenated version.
 */
@TCB
@NotThreadSafe
public class HtmlStreamRenderer implements HtmlStreamEventReceiver {

  private final Appendable output;
  private final Handler ioExHandler;
  private final Handler badHtmlHandler;
  private String lastTagOpened;
  private StringBuilder pendingUnescaped;
  private boolean open;

  /**
   * Factory.
   * @param output the buffer to which HTML is streamed.
   * @param ioExHandler called with any exception raised by output.
   * @param badHtmlHandler receives alerts when HTML cannot be rendered because
   *    there is not valid HTML tree that results from that series of calls.
   *    E.g. it is not possible to create an HTML {@code "}.
   */
  public static HtmlStreamRenderer create(
      @WillCloseWhenClosed Appendable output,
      Handler ioExHandler,
      Handler badHtmlHandler) {
    if (output instanceof Closeable) {
      return new CloseableHtmlStreamRenderer(
          output, ioExHandler, badHtmlHandler);
    } else {
      return new HtmlStreamRenderer(output, ioExHandler, badHtmlHandler);
    }
  }

  /**
   * Factory.
   * @param output the buffer to which HTML is streamed.
   * @param badHtmlHandler receives alerts when HTML cannot be rendered because
   *    there is not valid HTML tree that results from that series of calls.
   *    E.g. it is not possible to create an HTML {@code "}.
   */
  public static HtmlStreamRenderer create(
      StringBuilder output, Handler badHtmlHandler) {
    // Propagate since StringBuilder should not throw IOExceptions.
    return create(output, Handler.PROPAGATE, badHtmlHandler);
  }

  private HtmlStreamRenderer(
      Appendable output, Handler ioExHandler,
      Handler badHtmlHandler) {
    this.output = output;
    this.ioExHandler = ioExHandler;
    this.badHtmlHandler = badHtmlHandler;
  }

  /**
   * Called when the series of calls make no sense.
   * May be overridden to throw an unchecked throwable, to log, or to take some
   * other action.
   *
   * @param message for human consumption.
   * @param identifier an HTML identifier associated with the message.
   */
  private final void error(String message, CharSequence identifier) {
    if (badHtmlHandler != Handler.DO_NOTHING) {   // Avoid string append.
      badHtmlHandler.handle(message + " : " + identifier);
    }
  }

  public final void openDocument() throws IllegalStateException {
    if (open) { throw new IllegalStateException(); }
    open = true;
  }

  public final void closeDocument() throws IllegalStateException {
    if (!open) { throw new IllegalStateException(); }
    if (pendingUnescaped != null) {
      closeTag(lastTagOpened);
    }
    open = false;
    if (output instanceof Flushable) {
      try {
        ((Flushable) output).flush();
      } catch (IOException ex) {
        ioExHandler.handle(ex);
      }
    }
  }

  public final boolean isDocumentOpen() {
    return open;
  }

  public final void openTag(String elementName, List attrs) {
    try {
      writeOpenTag(elementName, attrs);
    } catch (IOException ex) {
      ioExHandler.handle(ex);
    }
  }

  private void writeOpenTag(String elementName, List attrs)
      throws IOException {
    if (!open) { throw new IllegalStateException(); }
    elementName = safeName(elementName);
    if (!isValidHtmlName(elementName)) {
      error("Invalid element name", elementName);
      return;
    }
    if (pendingUnescaped != null) {
      error("Tag content cannot appear inside CDATA element", elementName);
      return;
    }

    switch (HtmlTextEscapingMode.getModeForTag(elementName)) {
      case CDATA_SOMETIMES:
      case CDATA:
      case PLAIN_TEXT:
        lastTagOpened = elementName;
        pendingUnescaped = new StringBuilder();
        break;
      default:
    }

    output.append('<').append(elementName);

    for (Iterator attrIt = attrs.iterator();
         attrIt.hasNext();) {
      String name = attrIt.next();
      String value = attrIt.next();
      name = HtmlLexer.canonicalName(name);
      if (!isValidHtmlName(name)) {
        error("Invalid attr name", name);
        continue;
      }
      output.append(' ').append(name).append('=').append('"');
      Encoding.encodeHtmlOnto(value, output);
      if (value.indexOf('`') != -1) {
        // Apparently, in quirks mode, IE8 does a poor job producing innerHTML
        // values.  Given
        //     
// we encode ` but if JavaScript does: // nodeA.innerHTML = nodeB.innerHTML; // and nodeB contains the DIV above, then IE8 will produce //
// as the value of nodeB.innerHTML and assign it to nodeA. // IE8's HTML parser treats `` as a blank attribute value and foo=bar // becomes a separate attribute. // Adding a space at the end of the attribute prevents this by forcing // IE8 to put double quotes around the attribute when computing // nodeB.innerHTML. output.append(' '); } output.append('"'); } // Limit our output to the intersection of valid XML and valid HTML5 when // the output contains no special HTML5 elements like , <script>, or // <textarea>. if (HtmlTextEscapingMode.isVoidElement(elementName)) { output.append(" /"); } output.append('>'); } public final void closeTag(String elementName) { try { writeCloseTag(safeName(elementName)); } catch (IOException ex) { ioExHandler.handle(ex); } } private final void writeCloseTag(String elementName) throws IOException { if (!open) { throw new IllegalStateException(); } elementName = HtmlLexer.canonicalName(elementName); if (!isValidHtmlName(elementName)) { error("Invalid element name", elementName); return; } if (pendingUnescaped != null) { if (!lastTagOpened.equals(elementName)) { error("Tag content cannot appear inside CDATA element", elementName); return; } else { StringBuilder cdataContent = pendingUnescaped; pendingUnescaped = null; Encoding.stripBannedCodeunits(cdataContent); int problemIndex = checkHtmlCdataCloseable(lastTagOpened, cdataContent); if (problemIndex == -1) { output.append(cdataContent); } else { error( "Invalid CDATA text content", cdataContent.subSequence( problemIndex, Math.min(problemIndex + 10, cdataContent.length()))); // Still output the close tag. } } if ("plaintext".equals(elementName)) { return; } } output.append("</").append(elementName).append(">"); } public final void text(String text) { try { writeText(text); } catch (IOException ex) { ioExHandler.handle(ex); } } private final void writeText(String text) throws IOException { if (!open) { throw new IllegalStateException(); } if (pendingUnescaped != null) { pendingUnescaped.append(text); } else { Encoding.encodeHtmlOnto(text, output); // Works for RCDATA. } } private static int checkHtmlCdataCloseable( String localName, StringBuilder sb) { int escapingTextSpanStart = -1; for (int i = 0, n = sb.length(); i < n; ++i) { char ch = sb.charAt(i); switch (ch) { case '<': if (i + 3 < n && '!' == sb.charAt(i + 1) && '-' == sb.charAt(i + 2) && '-' == sb.charAt(i + 3)) { if (escapingTextSpanStart == -1) { escapingTextSpanStart = i; } else { return i; } } else if (i + 1 + localName.length() < n && '/' == sb.charAt(i + 1) && Strings.regionMatchesIgnoreCase( sb, i + 2, localName, 0, localName.length())) { // A close tag contained in the content. if (escapingTextSpanStart < 0) { // We could try some recovery strategies here. // E.g. prepending "/<!--\n" to sb if "script".equals(localName) return i; } if (!"script".equals(localName)) { // Script tags are commonly included inside script tags. // <script><!--document.write('<script>f()</script>');--></script> // but this does not happen in other CDATA element types. // Actually allowing an end tag inside others is problematic. // Specifically, // <style><!--</style>-->/* foo */</style> // displays the text "/* foo */" on some browsers. return i; } } break; case '>': // From the HTML5 spec: // The text in style, script, title, and textarea elements must not // have an escaping text span start that is not followed by an // escaping text span end. // We look left since the HTML 5 spec allows the escaping text span // end to share dashes with the start. if (i >= 2 && '-' == sb.charAt(i - 1) && '-' == sb.charAt(i - 2)) { if (escapingTextSpanStart < 0) { return i - 2; } escapingTextSpanStart = -1; } break; default: break; } } if (escapingTextSpanStart >= 0) { // We could try recovery strategies here. // E.g. appending "//-->" to the buffer if "script".equals(localName) return escapingTextSpanStart; } return -1; } @VisibleForTesting static boolean isValidHtmlName(String name) { int n = name.length(); if (n == 0) { return false; } if (n > 128) { return false; } boolean isNamespaced = false; for (int i = 0; i < n; ++i) { char ch = name.charAt(i); switch (ch) { case ':': if (isNamespaced) { return false; } isNamespaced = true; if (i == 0 || i + 1 == n) { return false; } break; case '-': if (i == 0 || i + 1 == n) { return false; } break; default: if (ch <= '9') { if (i == 0 || ch < '0') { return false; } } else if ('A' <= ch && ch <= 'z') { if ('Z' < ch && ch < 'a') { return false; } } else { return false; } break; } } return true; } /** * Canonicalizes the element name and possibly substitutes an alternative * that has more consistent semantics. */ static String safeName(String elementName) { elementName = HtmlLexer.canonicalName(elementName); // Substitute a reliably non-raw-text element for raw-text and // plain-text elements. switch (elementName.length()) { case 3: if ("xmp".equals(elementName)) { return "pre"; } break; case 7: if ("listing".equals(elementName)) { return "pre"; } break; case 9: if ("plaintext".equals(elementName)) { return "pre"; } break; } return elementName; } static class CloseableHtmlStreamRenderer extends HtmlStreamRenderer implements Closeable { private final Closeable closeable; CloseableHtmlStreamRenderer( @WillCloseWhenClosed Appendable output, Handler<? super IOException> errorHandler, Handler<? super String> badHtmlHandler) { super(output, errorHandler, badHtmlHandler); this.closeable = (Closeable) output; } public void close() throws IOException { if (isDocumentOpen()) { closeDocument(); } closeable.close(); } } } </code></pre> <br/> <br/> <div id="right-banner"> </div> <div id="left-banner"> </div> <div class='clear'></div> <aside class="related-items"> <section> <div class="panel panel-primary"> <div class="panel-heading margin-bottom">Related Artifacts</div> <div class=""> <a title='This artifact is from the group mysql' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/mysql/mysql-connector-java' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> mysql-connector-java <small class='group-info' >mysql</small></a><br/><a title='This artifact is from the group com.github.codedrinker' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.github.codedrinker/facebook-messenger' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> facebook-messenger <small class='group-info' >com.github.codedrinker</small></a><br/><a title='This artifact is from the group org.seleniumhq.selenium' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.seleniumhq.selenium/selenium-java' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> selenium-java <small class='group-info' >org.seleniumhq.selenium</small></a><br/><a title='This artifact is from the group com.github.sola92' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.github.sola92/instagram-java' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> instagram-java <small class='group-info' >com.github.sola92</small></a><br/><a title='This artifact is from the group com.google.code.gson' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.google.code.gson/gson' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> gson <small class='group-info' >com.google.code.gson</small></a><br/><a title='This artifact is from the group org.apache.poi' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.apache.poi/poi' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> poi <small class='group-info' >org.apache.poi</small></a><br/><a title='This artifact is from the group org.apache.httpcomponents' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.apache.httpcomponents/httpclient' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> httpclient <small class='group-info' >org.apache.httpcomponents</small></a><br/><a title='This artifact is from the group org.json' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.json/json' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> json <small class='group-info' >org.json</small></a><br/><a title='This artifact is from the group com.google.code.facebook-java-api' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.google.code.facebook-java-api/facebook-java-api' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> facebook-java-api <small class='group-info' >com.google.code.facebook-java-api</small></a><br/><a title='This artifact is from the group org.apache.poi' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.apache.poi/poi-ooxml' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> poi-ooxml <small class='group-info' >org.apache.poi</small></a><br/><a title='This artifact is from the group com.fasterxml.jackson.core' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.fasterxml.jackson.core/jackson-databind' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> jackson-databind <small class='group-info' >com.fasterxml.jackson.core</small></a><br/><a title='This artifact is from the group junit' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/junit/junit' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> junit <small class='group-info' >junit</small></a><br/><a title='This artifact is from the group org.primefaces' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.primefaces/primefaces' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> primefaces <small class='group-info' >org.primefaces</small></a><br/><a title='This artifact is from the group com.github.noraui' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.github.noraui/ojdbc7' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> ojdbc7 <small class='group-info' >com.github.noraui</small></a><br/><a title='This artifact is from the group com.jfoenix' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.jfoenix/jfoenix' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> jfoenix <small class='group-info' >com.jfoenix</small></a><br/><a title='This artifact is from the group org.testng' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.testng/testng' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> testng <small class='group-info' >org.testng</small></a><br/><a title='This artifact is from the group com.googlecode.json-simple' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.googlecode.json-simple/json-simple' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> json-simple <small class='group-info' >com.googlecode.json-simple</small></a><br/><a title='This artifact is from the group org.seleniumhq.selenium' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.seleniumhq.selenium/selenium-server' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> selenium-server <small class='group-info' >org.seleniumhq.selenium</small></a><br/><a title='This artifact is from the group com.itextpdf' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.itextpdf/itextpdf' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> itextpdf <small class='group-info' >com.itextpdf</small></a><br/><a title='This artifact is from the group org.springframework' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.springframework/spring-core' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> spring-core <small class='group-info' >org.springframework</small></a><br/> </div> </div> </section> <section> <div class="panel panel-primary"> <div class="panel-heading margin-bottom">Related Groups</div> <div class=""> <a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.springframework' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.springframework</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.apache.poi' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.apache.poi</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.hibernate' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.hibernate</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.springframework.boot' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.springframework.boot</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.fasterxml.jackson.core' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> com.fasterxml.jackson.core</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.itextpdf' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> com.itextpdf</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.seleniumhq.selenium' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.seleniumhq.selenium</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/mysql' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> mysql</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.finos.legend.engine' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.finos.legend.engine</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.apache.httpcomponents' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.apache.httpcomponents</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.apache.logging.log4j' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.apache.logging.log4j</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.openjfx' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.openjfx</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.apache.commons' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.apache.commons</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.json' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.json</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.google.guava' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> com.google.guava</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.google.zxing' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> com.google.zxing</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/net.sf.jasperreports' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> net.sf.jasperreports</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/javax.xml.bind' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> javax.xml.bind</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/ojdbc' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> ojdbc</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.google.code.facebook-java-api' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> com.google.code.facebook-java-api</a><br/> </div> </div> </section> </aside> <div class='clear'></div> </main> </div> <br/><br/> <div class="align-center">© 2015 - 2024 <a href="/legal-notice.php">Weber Informatics LLC</a> | <a href="/data-protection.php">Privacy Policy</a></div> <br/><br/><br/><br/><br/><br/> </body> </html>