All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.j2cl.common.StringUtils Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2022 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.j2cl.common;

import static com.google.common.base.Preconditions.checkArgument;
import static java.lang.Character.digit;
import static java.util.stream.Collectors.joining;

/** Utilities to produce Strings in code. */
public final class StringUtils {

  /** Return the String with first letter capitalized. */
  public static String capitalize(String string) {
    if (string.isEmpty()) {
      return string;
    }
    return string.substring(0, 1).toUpperCase() + string.substring(1);
  }

  /**
   * Unescapes properly escaped Wtf16 strings.
   *
   * 

Note: meant to be used as the inverse of escapeAsWtf16(). */ public static String unescapeWtf16(String string) { StringBuilder unescapedStringBuilder = new StringBuilder(); char[] charArray = string.toCharArray(); for (int i = 0; i < charArray.length; i++) { char c = charArray[i]; if (c == '\\') { // escape sequence. i++; switch (charArray[i]) { case 't': unescapedStringBuilder.append('\t'); continue; case 'n': // newline unescapedStringBuilder.append('\n'); continue; case 'r': unescapedStringBuilder.append('\r'); continue; case '"': unescapedStringBuilder.append('\"'); continue; case '\'': unescapedStringBuilder.append('\''); continue; case '\\': unescapedStringBuilder.append('\\'); continue; case 'u': unescapedStringBuilder.append(unescapeUnicode(charArray, i)); i += 4; continue; default: throw new InternalCompilerError("Bad escaping " + string); } } unescapedStringBuilder.append(c); } return unescapedStringBuilder.toString(); } private static char unescapeUnicode(char[] charArray, int i) { char value = 0; for (int j = 0; j < 4; j++) { int digit = digit(charArray[++i], 16); if (digit < 0) { throw new InternalCompilerError("Bad escaping " + new String(charArray)); } value = (char) (value * 16 + digit); } return value; } public static String escapeAsWtf16(String string) { // The chars in the CharSequence are already in WTF16. Hence iterate over the 16 bits chars // and decide how to encode in the string. return string.chars().mapToObj(StringUtils::escapeAsWtf16).collect(joining()); } public static String escapeAsWtf16(int c) { return escape(c, /* forUtf8= */ false); } /** Converts a potentially ill-formed UTF-16 string (WTF-16) into a UTF-8 string literal. */ public static String escapeAsUtf8(String string) { StringBuilder escaped = new StringBuilder(); string .codePoints() .forEach( codepoint -> { if (codepoint < 0x80) { escaped.append(escapeAsUtf8(codepoint)); } else if (codepoint < 0x800) { escaped.append(escapeAsUtf8(0xC0 | (codepoint >> 6))); // upper bits escaped.append(escapeAsUtf8(0x80 | (codepoint & 0x3F))); // bits 0-5 } else if (codepoint < 0x10000) { escaped.append(escapeAsUtf8(0xE0 | (codepoint >> 12))); // upper bits escaped.append(escapeAsUtf8(0x80 | ((codepoint >> 6) & 0x3F))); // bits 6-11 escaped.append(escapeAsUtf8(0x80 | (codepoint & 0x3F))); // bits 0-5 } else { escaped.append(escapeAsUtf8(0xF0 | (codepoint >> 18))); // upper bits escaped.append(escapeAsUtf8(0x80 | ((codepoint >> 12) & 0x3F))); // bits 12-17 escaped.append(escapeAsUtf8(0x80 | ((codepoint >> 6) & 0x3F))); // bits 6-11 escaped.append(escapeAsUtf8(0x80 | (codepoint & 0x3F))); // bits 0-5 } }); return escaped.toString(); } public static String escapeAsUtf8(int c) { return escape(c, /* forUtf8= */ true); } /** Produce a readable encoding of a byte in a String. */ private static String escape(int c, boolean forUtf8) { switch (c) { case 0x09: // tab return "\\t"; case 0x0A: // newline return "\\n"; case 0x0D: // return return "\\r"; case 0x22: // " return "\\\""; case 0x27: // ' return "\\'"; case 0x5c: // \ return "\\\\"; default: // fall out } // The rest of the ascii range characters do not need escaping in either representation. if (c >= 0x20 && c < 0x7F) { return String.valueOf((char) c); } if (forUtf8) { checkArgument(c >= 0 && c <= 0xFF); return String.format("\\%02X", (byte) c); } else { return String.format("\\u%04X", c); } } private StringUtils() {} }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy