All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.gwtproject.safehtml.shared.SafeHtmlUtils Maven / Gradle / Ivy

/*
 * Copyright © 2019 The GWT Project Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.gwtproject.safehtml.shared;

import elemental2.core.JsRegExp;
import elemental2.core.JsString;
import org.gwtproject.safehtml.shared.annotations.GwtIncompatible;
import org.gwtproject.safehtml.shared.annotations.IsSafeHtml;
import org.gwtproject.safehtml.shared.annotations.SuppressIsSafeHtmlCastCheck;

/** Utility class containing static methods for escaping and sanitizing strings. */
public final class SafeHtmlUtils {

  /** An empty String. */
  public static final SafeHtml EMPTY_SAFE_HTML = new SafeHtmlString("");

  public static final String HTML_CHARS = "[&<>'\"]";
  private static final String HTML_ENTITY_REGEX = "[a-z]+|#[0-9]+|#x[0-9a-fA-F]+";
  private static final JvmImpl impl = new JvmImpl();

  // prevent instantiation
  private SafeHtmlUtils() {}

  /**
   * Returns a {@link SafeHtml} constructed from a safe string, i.e., without escaping the string.
   *
   * 

Important: For this method to be able to honor the {@link SafeHtml} contract, all * uses of this method must satisfy the following constraints: * *

    *
  1. The argument expression must be fully determined at compile time. *
  2. The value of the argument must end in "inner HTML" context and not contain incomplete * HTML tags. I.e., the following is not a correct use of this method, because the {@code * } tag is incomplete: *
       * {@code shb.appendHtmlConstant("':
            return ">";
          case '"':
            return """;
          case '\'':
            return "'";
          default:
            return "" + c;
        }
      }
    
      /**
       * HTML-escapes a string, but does not double-escape HTML-entities already present in the string.
       *
       * @param text the string to be escaped
       * @return the input string, with all occurrences of HTML meta-characters replaced with their
       *     corresponding HTML Entity References, with the exception that ampersand characters are not
       *     double-escaped if they form the start of an HTML Entity Reference
       */
      @IsSafeHtml
      @SuppressIsSafeHtmlCastCheck
      public static String htmlEscapeAllowEntities(String text) {
        StringBuilder escaped = new StringBuilder();
    
        boolean firstSegment = true;
        for (String segment : text.split("&", -1)) {
          if (firstSegment) {
            /*
             * The first segment is never part of an entity reference, so we always
             * escape it.
             * Note that if the input starts with an ampersand, we will get an empty
             * segment before that.
             */
            firstSegment = false;
            escaped.append(htmlEscape(segment));
            continue;
          }
    
          int entityEnd = segment.indexOf(';');
          if (entityEnd > 0 && segment.substring(0, entityEnd).matches(HTML_ENTITY_REGEX)) {
            // Append the entity without escaping.
            escaped.append("&").append(segment.substring(0, entityEnd + 1));
    
            // Append the rest of the segment, escaped.
            escaped.append(htmlEscape(segment.substring(entityEnd + 1)));
          } else {
            // The segment did not start with an entity reference, so escape the
            // whole segment.
            escaped.append("&").append(htmlEscape(segment));
          }
        }
    
        return escaped.toString();
      }
    
      static class JsImpl {
    
        private static final JsRegExp HTML_CHARS_RE = new JsRegExp(HTML_CHARS);
        private static final JsRegExp AMP_RE = new JsRegExp("&", "g");
        private static final JsRegExp GT_RE = new JsRegExp(">", "g");
        private static final JsRegExp LT_RE = new JsRegExp("<", "g");
        private static final JsRegExp SQUOT_RE = new JsRegExp("\'", "g");
        private static final JsRegExp QUOT_RE = new JsRegExp("\"", "g");
    
        String htmlEscape(String s) {
          if (!HTML_CHARS_RE.test(s)) {
            return s;
          }
          if (s.indexOf("&") != -1) {
            s = new JsString(s).replace(AMP_RE, "&");
          }
          if (s.indexOf("<") != -1) {
            s = new JsString(s).replace(LT_RE, "<");
          }
          if (s.indexOf(">") != -1) {
            s = new JsString(s).replace(GT_RE, ">");
          }
          if (s.indexOf("\"") != -1) {
            s = new JsString(s).replace(QUOT_RE, """);
          }
          if (s.indexOf("'") != -1) {
            s = new JsString(s).replace(SQUOT_RE, "'");
          }
          return s;
        }
      }
    
      static class JvmImpl extends JsImpl {
    
        @GwtIncompatible
        @Override
        String htmlEscape(String s) {
          if (!s.matches("[\\s\\S]*" + HTML_CHARS + "[\\s\\S]*")) {
            return s;
          }
          if (s.indexOf("&") != -1) {
            s = s.replaceAll("&", "&");
          }
          if (s.indexOf("<") != -1) {
            s = s.replaceAll("<", "<");
          }
          if (s.indexOf(">") != -1) {
            s = s.replaceAll(">", ">");
          }
          if (s.indexOf("\"") != -1) {
            s = s.replaceAll("\"", """);
          }
          if (s.indexOf("'") != -1) {
            s = s.replaceAll("'", "'");
          }
          return s;
        }
      }
    }
    




© 2015 - 2025 Weber Informatics LLC | Privacy Policy