org.weakref.jmx.internal.guava.xml.XmlEscapers Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jmxutils Show documentation
Exporting JMX mbeans made easy
There is a newer version: 1.26
/*
 * Copyright (C) 2009 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.weakref.jmx.internal.guava.xml;

import org.weakref.jmx.internal.guava.annotations.Beta;
import org.weakref.jmx.internal.guava.annotations.GwtCompatible;
import org.weakref.jmx.internal.guava.escape.Escaper;
import org.weakref.jmx.internal.guava.escape.Escapers;

/**
 * {@code Escaper} instances suitable for strings to be included in XML
 * attribute values and elements' text contents. When possible, avoid manual
 * escaping by using templating systems and high-level APIs that provide
 * autoescaping. For example, consider XOM or
 * JDOM.
 *
 * Note: Currently the escapers provided by this class do not escape
 * any characters outside the ASCII character range. Unlike HTML escaping the
 * XML escapers will not escape non-ASCII characters to their numeric entity
 * replacements. These XML escapers provide the minimal level of escaping to
 * ensure that the output can be safely included in a Unicode XML document.
 *
 *
 * 
For details on the behavior of the escapers in this class, see sections
 * 2.2 and
 * 2.4 of the
 * XML specification.
 *
 * @author Alex Matevossian
 * @author David Beaumont
 * @since 15.0
 */
@Beta
@GwtCompatible
public class XmlEscapers {
  private XmlEscapers() {}

  private static final char MIN_ASCII_CONTROL_CHAR = 0x00;
  private static final char MAX_ASCII_CONTROL_CHAR = 0x1F;

  // For each xxxEscaper() method, please add links to external reference pages
  // that are considered authoritative for the behavior of that escaper.

  // TODO(user): When this escaper strips \uFFFE & \uFFFF, add this doc.
  // 
This escaper also silently removes non-whitespace control characters and
  // the character values {@code 0xFFFE} and {@code 0xFFFF} which are not
  // permitted in XML. For more detail see section
  // 2.2 of
  // the XML specification.

  /**
   * Returns an {@link Escaper} instance that escapes special characters in a
   * string so it can safely be included in an XML document as element content.
   * See section
   * 2.4 of the
   * XML specification.
   *
   * 
Note: Double and single quotes are not escaped, so it is not
   * safe to use this escaper to escape attribute values. Use
   * {@link #xmlContentEscaper} if the output can appear in element content or
   * {@link #xmlAttributeEscaper} in attribute values.
   *
   * 
This escaper does not escape non-ASCII characters to their numeric
   * character references (NCR). Any non-ASCII characters appearing in the input
   * will be preserved in the output. Specifically "\r" (carriage return) is
   * preserved in the output, which may result in it being silently converted to
   * "\n" when the XML is parsed.
   *
   * 
This escaper does not treat surrogate pairs specially and does not
   * perform Unicode validation on its input.
   */
  public static Escaper xmlContentEscaper() {
    return XML_CONTENT_ESCAPER;
  }

  /**
   * Returns an {@link Escaper} instance that escapes special characters in a
   * string so it can safely be included in XML document as an attribute value.
   * See section
   * 3.3.3
   * of the XML specification.
   *
   * 
This escaper does not escape non-ASCII characters to their numeric
   * character references (NCR). However, horizontal tab {@code '\t'}, line feed
   * {@code '\n'} and carriage return {@code '\r'} are escaped to a
   * corresponding NCR {@code "	"}, {@code "
"}, and {@code ""}
   * respectively. Any other non-ASCII characters appearing in the input will
   * be preserved in the output.
   *
   * This escaper does not treat surrogate pairs specially and does not
   * perform Unicode validation on its input.
   */
  public static Escaper xmlAttributeEscaper() {
    return XML_ATTRIBUTE_ESCAPER;
  }

  private static final Escaper XML_ESCAPER;
  private static final Escaper XML_CONTENT_ESCAPER;
  private static final Escaper XML_ATTRIBUTE_ESCAPER;
  static {
    Escapers.Builder builder = Escapers.builder();
    // The char values \uFFFE and \uFFFF are explicitly not allowed in XML
    // (Unicode code points above \uFFFF are represented via surrogate pairs
    // which means they are treated as pairs of safe characters).
    // TODO(user): When refactoring done change the \uFFFF below to \uFFFD
    builder.setSafeRange(Character.MIN_VALUE, '\uFFFF');
    // Unsafe characters are removed.
    builder.setUnsafeReplacement("");

    // Except for '\n', '\t' and '\r' we remove all ASCII control characters.
    // An alternative to this would be to make a map that simply replaces the
    // allowed ASCII whitespace characters with themselves and set the minimum
    // safe character to 0x20. However this would slow down the escaping of
    // simple strings that contain '\t','\n' or '\r'.
    for (char c = MIN_ASCII_CONTROL_CHAR; c <= MAX_ASCII_CONTROL_CHAR; c++) {
      if (c != '\t' && c != '\n' && c != '\r') {
        builder.addEscape(c, "");
      }
    }

    // Build the content escaper first and then add quote escaping for the
    // general escaper.
    builder.addEscape('&', "&");
    builder.addEscape('<', "<");
    builder.addEscape('>', ">");
    XML_CONTENT_ESCAPER = builder.build();
    builder.addEscape('\'', "'");
    builder.addEscape('"', """);
    XML_ESCAPER = builder.build();
    builder.addEscape('\t', "	");
    builder.addEscape('\n', "
");
    builder.addEscape('\r', "");
    XML_ATTRIBUTE_ESCAPER = builder.build();
  }
}