All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.common.escape.Escapers Maven / Gradle / Ivy

There is a newer version: 1.3.3
Show newest version
/*
 * Copyright (C) 2009 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.common.escape;

import static com.google.common.base.Preconditions.checkNotNull;

import com.google.common.annotations.Beta;
import com.google.common.annotations.GwtCompatible;

import java.util.HashMap;
import java.util.Map;

import javax.annotation.Nullable;

/**
 * Static utility methods pertaining to {@link Escaper} instances.
 *
 * @author Sven Mawson
 * @author David Beaumont
 * @since 15.0
 */
@Beta
@GwtCompatible
public final class Escapers {
  private Escapers() {}

  /**
   * Returns an {@link Escaper} that does no escaping, passing all character
   * data through unchanged.
   */
  public static Escaper nullEscaper() {
    return NULL_ESCAPER;
  }

  // An Escaper that efficiently performs no escaping.
  // Extending CharEscaper (instead of Escaper) makes Escapers.compose() easier.
  private static final Escaper NULL_ESCAPER = new CharEscaper() {
    @Override public String escape(String string) {
      return checkNotNull(string);
    }

    @Override protected char[] escape(char c) {
      // TODO: Fix tests not to call this directly and make it throw an error.
      return null;
    }
  };

  /**
   * Returns a builder for creating simple, fast escapers. A builder instance
   * can be reused and each escaper that is created will be a snapshot of the
   * current builder state. Builders are not thread safe.
   *
   * 

The initial state of the builder is such that: *

    *
  • There are no replacement mappings
  • *
  • {@code safeMin == Character.MIN_VALUE}
  • *
  • {@code safeMax == Character.MAX_VALUE}
  • *
  • {@code unsafeReplacement == null}
  • *
*

For performance reasons escapers created by this builder are not * Unicode aware and will not validate the well-formedness of their input. */ public static Builder builder() { return new Builder(); } /** * A builder for simple, fast escapers. * *

Typically an escaper needs to deal with the escaping of high valued * characters or code points. In these cases it is necessary to extend either * {@link ArrayBasedCharEscaper} or {@link ArrayBasedUnicodeEscaper} to * provide the desired behavior. However this builder is suitable for creating * escapers that replace a relative small set of characters. * * @author David Beaumont * @since 15.0 */ @Beta public static final class Builder { private final Map replacementMap = new HashMap(); private char safeMin = Character.MIN_VALUE; private char safeMax = Character.MAX_VALUE; private String unsafeReplacement = null; // The constructor is exposed via the builder() method above. private Builder() {} /** * Sets the safe range of characters for the escaper. Characters in this * range that have no explicit replacement are considered 'safe' and remain * unescaped in the output. If {@code safeMax < safeMin} then the safe range * is empty. * * @param safeMin the lowest 'safe' character * @param safeMax the highest 'safe' character * @return the builder instance */ public Builder setSafeRange(char safeMin, char safeMax) { this.safeMin = safeMin; this.safeMax = safeMax; return this; } /** * Sets the replacement string for any characters outside the 'safe' range * that have no explicit replacement. If {@code unsafeReplacement} is * {@code null} then no replacement will occur, if it is {@code ""} then * the unsafe characters are removed from the output. * * @param unsafeReplacement the string to replace unsafe chracters * @return the builder instance */ public Builder setUnsafeReplacement(@Nullable String unsafeReplacement) { this.unsafeReplacement = unsafeReplacement; return this; } /** * Adds a replacement string for the given input character. The specified * character will be replaced by the given string whenever it occurs in the * input, irrespective of whether it lies inside or outside the 'safe' * range. * * @param c the character to be replaced * @param replacement the string to replace the given character * @return the builder instance * @throws NullPointerException if {@code replacement} is null */ public Builder addEscape(char c, String replacement) { checkNotNull(replacement); // This can replace an existing character (the builder is re-usable). replacementMap.put(c, replacement); return this; } /** * Returns a new escaper based on the current state of the builder. */ public Escaper build() { return new ArrayBasedCharEscaper(replacementMap, safeMin, safeMax) { private final char[] replacementChars = unsafeReplacement != null ? unsafeReplacement.toCharArray() : null; @Override protected char[] escapeUnsafe(char c) { return replacementChars; } }; } } /** * Returns a {@link UnicodeEscaper} equivalent to the given escaper instance. * If the escaper is already a UnicodeEscaper then it is simply returned, * otherwise it is wrapped in a UnicodeEscaper. * *

When a {@link CharEscaper} escaper is wrapped by this method it acquires * extra behavior with respect to the well-formedness of Unicode character * sequences and will throw {@link IllegalArgumentException} when given bad * input. * * @param escaper the instance to be wrapped * @return a UnicodeEscaper with the same behavior as the given instance * @throws NullPointerException if escaper is null * @throws IllegalArgumentException if escaper is not a UnicodeEscaper or a * CharEscaper */ static UnicodeEscaper asUnicodeEscaper(Escaper escaper) { checkNotNull(escaper); if (escaper instanceof UnicodeEscaper) { return (UnicodeEscaper) escaper; } else if (escaper instanceof CharEscaper) { return wrap((CharEscaper) escaper); } // In practice this shouldn't happen because it would be very odd not to // extend either CharEscaper or UnicodeEscaper for non trivial cases. throw new IllegalArgumentException("Cannot create a UnicodeEscaper from: " + escaper.getClass().getName()); } /** * Returns a string that would replace the given character in the specified * escaper, or {@code null} if no replacement should be made. This method is * intended for use in tests through the {@code EscaperAsserts} class; * production users of {@link CharEscaper} should limit themselves to its * public interface. * * @param c the character to escape if necessary * @return the replacement string, or {@code null} if no escaping was needed */ public static String computeReplacement(CharEscaper escaper, char c) { return stringOrNull(escaper.escape(c)); } /** * Returns a string that would replace the given character in the specified * escaper, or {@code null} if no replacement should be made. This method is * intended for use in tests through the {@code EscaperAsserts} class; * production users of {@link UnicodeEscaper} should limit themselves to its * public interface. * * @param cp the Unicode code point to escape if necessary * @return the replacement string, or {@code null} if no escaping was needed */ public static String computeReplacement(UnicodeEscaper escaper, int cp) { return stringOrNull(escaper.escape(cp)); } private static String stringOrNull(char[] in) { return (in == null) ? null : new String(in); } /** Private helper to wrap a CharEscaper as a UnicodeEscaper. */ private static UnicodeEscaper wrap(final CharEscaper escaper) { return new UnicodeEscaper() { @Override protected char[] escape(int cp) { // If a code point maps to a single character, just escape that. if (cp < Character.MIN_SUPPLEMENTARY_CODE_POINT) { return escaper.escape((char) cp); } // Convert the code point to a surrogate pair and escape them both. // Note: This code path is horribly slow and typically allocates 4 new // char[] each time it is invoked. However this avoids any // synchronization issues and makes the escaper thread safe. char[] surrogateChars = new char[2]; Character.toChars(cp, surrogateChars, 0); char[] hiChars = escaper.escape(surrogateChars[0]); char[] loChars = escaper.escape(surrogateChars[1]); // If either hiChars or lowChars are non-null, the CharEscaper is trying // to escape the characters of a surrogate pair separately. This is // uncommon and applies only to escapers that assume UCS-2 rather than // UTF-16. See: http://en.wikipedia.org/wiki/UTF-16/UCS-2 if (hiChars == null && loChars == null) { // We expect this to be the common code path for most escapers. return null; } // Combine the characters and/or escaped sequences into a single array. int hiCount = hiChars != null ? hiChars.length : 1; int loCount = loChars != null ? loChars.length : 1; char[] output = new char[hiCount + loCount]; if (hiChars != null) { // TODO: Is this faster than System.arraycopy() for small arrays? for (int n = 0; n < hiChars.length; ++n) { output[n] = hiChars[n]; } } else { output[0] = surrogateChars[0]; } if (loChars != null) { for (int n = 0; n < loChars.length; ++n) { output[hiCount + n] = loChars[n]; } } else { output[hiCount] = surrogateChars[1]; } return output; } }; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy