All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.power.common.util.StringEscapeUtil Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.power.common.util;

import org.apache.commons.lang3.StringUtils;

import java.io.IOException;
import java.io.Writer;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;

/**
 * Reference apache comment-text StringEscapeUtils
 *
 * @author yu 2020/4/24.
 */
public class StringEscapeUtil {


    /**
     * A Map<CharSequence, CharSequence> to escape the Java
     * control characters.
     * 

* Namely: {@code \b \n \t \f \r} */ public static final Map JAVA_CTRL_CHARS_ESCAPE; private static final Map lookupMap = new HashMap<>(); static { final Map initialMap = new HashMap<>(); initialMap.put("\b", "\\b"); initialMap.put("\n", "\\n"); initialMap.put("\t", "\\t"); initialMap.put("\f", "\\f"); initialMap.put("\r", "\\r"); initialMap.put("\"", "\\\""); initialMap.put("\\", "\\\\"); JAVA_CTRL_CHARS_ESCAPE = Collections.unmodifiableMap(initialMap); for (final Map.Entry pair : JAVA_CTRL_CHARS_ESCAPE.entrySet()) { char c = pair.getKey().charAt(0); String index = Integer.toHexString(c); lookupMap.put(index, pair.getValue().toString()); } } /** *

Escapes the characters in a {@code String} using Java String rules.

* This method does not encode ASCII letters and numbers, nor does it encode the following ASCII punctuation marks: * @-_ +. /
* All other characters will be replaced by escape sequences. * * @param content String to escape values in, may be null * @return String with escaped values, {@code null} if null string input */ public static String escapeJava(String content) { return escapeJava(content, Boolean.FALSE); } /** *

Escapes the characters in a {@code String} using Java String rules.

* This method does not encode ASCII letters and numbers, nor does it encode the following ASCII punctuation marks: * @-_ +. /
* All other characters will be replaced by escape sequences. * * @param content String to escape values in, may be null * @return String with escaped values, {@code null} if null string input */ public static String escapeJavaIgnoreChinese(String content) { return escapeJava(content, Boolean.TRUE); } /** *

Escapes the characters in a {@code String} using Java String rules.

* This method does not encode ASCII letters and numbers. All other characters will be replaced by escape sequences. * * @param content String to escape values in, may be null * @param ignoreChinese ignore chinese * @return String with escaped values, {@code null} if null string input */ public static String escapeJava(String content, Boolean ignoreChinese) { if (Objects.isNull(content)) { return null; } final StringBuilder tmp = new StringBuilder(content.length() * 6); char j; for (int i = 0; i < content.length(); i++) { j = content.charAt(i); String index = Integer.toHexString(j); if (lookupMap.containsKey(index)) { tmp.append(lookupMap.get(index)); } else if ((j >= 0x4e00) && (j <= 0x9fbb) && ignoreChinese) { tmp.append(j); } else if ((j >= 32) && (j <= 0xf7)) { tmp.append(j); } else { tmp.append("\\u"); tmp.append(Character.toUpperCase(Character.forDigit((j >>> 12) & 0xf, 16))); tmp.append(Character.toUpperCase(Character.forDigit((j >>> 8) & 0xf, 16))); tmp.append(Character.toUpperCase(Character.forDigit((j >>> 4) & 0xf, 16))); tmp.append(Character.toUpperCase(Character.forDigit((j) & 0xf, 16))); } } return tmp.toString(); } public static final Map JAVA_CTRL_CHARS_UNESCAPE; private static final Map unescapeLookupMap = new HashMap<>(); static { final Map unescapeJavaMap = new HashMap<>(); unescapeJavaMap.put("\\\\", "\\"); unescapeJavaMap.put("\\\"", "\""); unescapeJavaMap.put("\\'", "'"); unescapeJavaMap.put("\\", StringUtils.EMPTY); unescapeJavaMap.put("\\b", "\b"); unescapeJavaMap.put("\\n", "\n"); unescapeJavaMap.put("\\t", "\t"); unescapeJavaMap.put("\\f", "\f"); unescapeJavaMap.put("\\r", "\r"); JAVA_CTRL_CHARS_UNESCAPE = Collections.unmodifiableMap(unescapeJavaMap); for (final Map.Entry pair : JAVA_CTRL_CHARS_UNESCAPE.entrySet()) { char c = pair.getKey().charAt(0); String index = Integer.toHexString(c); unescapeLookupMap.put(index, pair.getValue().toString()); } } public static Map invert(final Map map) { return map.entrySet().stream().collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey)); } private static boolean isOctalDigit(char ch) { return ch >= '0' && ch <= '7'; } private static boolean isZeroToThree(char ch) { return ch >= '0' && ch <= '3'; } /** * Unescapes any Java literals found in the {@code String}. * For example, it will turn a sequence of {@code '\'} and * {@code 'n'} into a newline character, unless the {@code '\'} * is preceded by another {@code '\'}. * * @param content the {@code String} to unescape, may be null * @return a new unescaped {@code String}, {@code null} if null string input */ public static String unescapeJava(String content) { if (Objects.isNull(content)) { return null; } final StringBuilder tmp = new StringBuilder(content.length() * 6); char j; int index = 0; int len = content.length(); while (index < len) { j = content.charAt(index); String hexString = Integer.toHexString(j); if (content.charAt(index) == '\\' && (content.length() - index - 1) > 0 && isOctalDigit(content.charAt(index + 1))) { int consumed = octalUnescape(content, index, tmp); for (int pt = 0; pt < consumed; pt++) { index += Character.charCount(Character.codePointAt(content, index)); } } else if (content.charAt(index) == '\\' && index + 1 < content.length() && content.charAt(index + 1) == 'u') { int consumed = unicodeUnescape(content, index, tmp); for (int pt = 0; pt < consumed; pt++) { index += Character.charCount(Character.codePointAt(content, index)); } } else if (unescapeLookupMap.containsKey(hexString)) { tmp.append(unescapeLookupMap.get(hexString)); index++; } else { tmp.append(j); index++; if (Character.isHighSurrogate(j) && index < len) { final char c2 = content.charAt(index); if (Character.isLowSurrogate(c2)) { tmp.append(c2); index++; } } } } return tmp.toString(); } /** * Translates escaped Unicode values of the form \\u+\d\d\d\d back to * Unicode. It supports multiple 'u' characters and will work with or * without the +. * * @since 1.0 */ private static int unicodeUnescape(String input, int index, StringBuilder tmp) { // consume optional additional 'u' chars int i = 2; while (index + i < input.length() && input.charAt(index + i) == 'u') { i++; } if (index + i < input.length() && input.charAt(index + i) == '+') { i++; } if (index + i + 4 <= input.length()) { // Get 4 hex digits final CharSequence unicode = input.subSequence(index + i, index + i + 4); try { final int value = Integer.parseInt(unicode.toString(), 16); tmp.append((char)value); } catch (final NumberFormatException nfe) { throw new IllegalArgumentException("Unable to parse unicode value: " + unicode, nfe); } return i + 4; } throw new IllegalArgumentException("Less than 4 hex digits in unicode value: '" + input.subSequence(index, input.length()) + "' due to end of CharSequence"); } /** * Translate escaped octal Strings back to their octal values. *

* For example, "\45" should go back to being the specific value (a %). *

* Note that this currently only supports the viable range of octal for Java; namely * 1 to 377. This is because parsing Java is the main use case. * * @since 1.0 */ private static int octalUnescape(String content, int index, StringBuilder tmp) { int remaining = content.length() - index - 1; StringBuilder builder = new StringBuilder(); int next = index + 1; int next2 = index + 2; int next3 = index + 3; builder.append(content.charAt(next)); if (remaining > 1 && isOctalDigit(content.charAt(next2))) { builder.append(content.charAt(next2)); if (remaining > 2 && isZeroToThree(content.charAt(next)) && isOctalDigit(content.charAt(next3))) { builder.append(content.charAt(next3)); } } tmp.append(Integer.parseInt(builder.toString(), 8)); return 1 + builder.length(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy