com.google.j2cl.common.StringUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of common Show documentation
The newest version!
/*
 * Copyright 2022 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.j2cl.common;

import static com.google.common.base.Preconditions.checkArgument;
import static java.lang.Character.digit;
import static java.util.stream.Collectors.joining;

/** Utilities to produce Strings in code. */
public final class StringUtils {

  /** Return the String with first letter capitalized. */
  public static String capitalize(String string) {
    if (string.isEmpty()) {
      return string;
    }
    return string.substring(0, 1).toUpperCase() + string.substring(1);
  }

  /**
   * Unescapes properly escaped Wtf16 strings.
   *
   * Note: meant to be used as the inverse of escapeAsWtf16().
   */
  public static String unescapeWtf16(String string) {
    StringBuilder unescapedStringBuilder = new StringBuilder();
    char[] charArray = string.toCharArray();
    for (int i = 0; i < charArray.length; i++) {
      char c = charArray[i];
      if (c == '\\') {
        // escape sequence.
        i++;
        switch (charArray[i]) {
          case 't':
            unescapedStringBuilder.append('\t');
            continue;
          case 'n': // newline
            unescapedStringBuilder.append('\n');
            continue;
          case 'r':
            unescapedStringBuilder.append('\r');
            continue;
          case '"':
            unescapedStringBuilder.append('\"');
            continue;
          case '\'':
            unescapedStringBuilder.append('\'');
            continue;
          case '\\':
            unescapedStringBuilder.append('\\');
            continue;
          case 'u':
            unescapedStringBuilder.append(unescapeUnicode(charArray, i));
            i += 4;
            continue;
          default:
            throw new InternalCompilerError("Bad escaping " + string);
        }
      }
      unescapedStringBuilder.append(c);
    }
    return unescapedStringBuilder.toString();
  }

  private static char unescapeUnicode(char[] charArray, int i) {
    char value = 0;

    for (int j = 0; j < 4; j++) {
      int digit = digit(charArray[++i], 16);
      if (digit < 0) {
        throw new InternalCompilerError("Bad escaping " + new String(charArray));
      }
      value = (char) (value * 16 + digit);
    }
    return value;
  }

  public static String escapeAsWtf16(String string) {
    // The chars in the CharSequence are already in WTF16. Hence iterate over the 16 bits chars
    // and decide how to encode in the string.
    return string.chars().mapToObj(StringUtils::escapeAsWtf16).collect(joining());
  }

  public static String escapeAsWtf16(int c) {
    return escape(c, /* forUtf8= */ false);
  }

  /** Converts a potentially ill-formed UTF-16 string (WTF-16) into a UTF-8 string literal. */
  public static String escapeAsUtf8(String string) {
    StringBuilder escaped = new StringBuilder();
    string
        .codePoints()
        .forEach(
            codepoint -> {
              if (codepoint < 0x80) {
                escaped.append(escapeAsUtf8(codepoint));
              } else if (codepoint < 0x800) {
                escaped.append(escapeAsUtf8(0xC0 | (codepoint >> 6))); // upper bits
                escaped.append(escapeAsUtf8(0x80 | (codepoint & 0x3F))); // bits 0-5
              } else if (codepoint < 0x10000) {
                escaped.append(escapeAsUtf8(0xE0 | (codepoint >> 12))); // upper bits
                escaped.append(escapeAsUtf8(0x80 | ((codepoint >> 6) & 0x3F))); // bits 6-11
                escaped.append(escapeAsUtf8(0x80 | (codepoint & 0x3F))); // bits 0-5
              } else {
                escaped.append(escapeAsUtf8(0xF0 | (codepoint >> 18))); // upper bits
                escaped.append(escapeAsUtf8(0x80 | ((codepoint >> 12) & 0x3F))); // bits 12-17
                escaped.append(escapeAsUtf8(0x80 | ((codepoint >> 6) & 0x3F))); // bits 6-11
                escaped.append(escapeAsUtf8(0x80 | (codepoint & 0x3F))); // bits 0-5
              }
            });
    return escaped.toString();
  }

  public static String escapeAsUtf8(int c) {
    return escape(c, /* forUtf8= */ true);
  }

  /** Produce a readable encoding of a byte in a String. */
  private static String escape(int c, boolean forUtf8) {
    switch (c) {
      case 0x09: // tab
        return "\\t";
      case 0x0A: // newline
        return "\\n";
      case 0x0D: // return
        return "\\r";
      case 0x22: // "
        return "\\\"";
      case 0x27: // '
        return "\\'";
      case 0x5c: // \
        return "\\\\";
      default: // fall out
    }

    // The rest of the ascii range characters do not need escaping in either representation.
    if (c >= 0x20 && c < 0x7F) {
      return String.valueOf((char) c);
    }

    if (forUtf8) {
      checkArgument(c >= 0 && c <= 0xFF);
      return String.format("\\%02X", (byte) c);
    } else {
      return String.format("\\u%04X", c);
    }
  }

  private StringUtils() {}
}