com.google.errorprone.bugpatterns.UnicodeEscape Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of error_prone_core Show documentation
There is a newer version: 2.28.0
/*
 * Copyright 2021 The Error Prone Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.errorprone.bugpatterns;

import static com.google.errorprone.BugPattern.SeverityLevel.WARNING;
import static com.google.errorprone.matchers.Description.NO_MATCH;

import com.google.errorprone.BugPattern;
import com.google.errorprone.VisitorState;
import com.google.errorprone.bugpatterns.BugChecker.CompilationUnitTreeMatcher;
import com.google.errorprone.fixes.FixedPosition;
import com.google.errorprone.fixes.SuggestedFix;
import com.google.errorprone.matchers.Description;
import com.sun.source.tree.CompilationUnitTree;

/** Replaces printable ASCII unicode escapes with the literal version. */
@BugPattern(
    name = "UnicodeEscape",
    summary =
        "Using unicode escape sequences for printable ASCII characters is obfuscated, and"
            + " potentially dangerous.",
    severity = WARNING)
public final class UnicodeEscape extends BugChecker implements CompilationUnitTreeMatcher {
  @Override
  public Description matchCompilationUnit(CompilationUnitTree tree, VisitorState state) {
    new UnicodeScanner(state.getSourceCode().toString(), state).scan();
    return NO_MATCH;
  }

  private final class UnicodeScanner {
    private final String source;
    private final VisitorState state;

    private int position = 0;
    private char currentCharacter = 0;
    private boolean isUnicode = false;
    private int lastBackslash = 0;

    private UnicodeScanner(String source, VisitorState state) {
      this.source = source;
      this.state = state;
      this.currentCharacter = source.charAt(0);
    }

    public void scan() {
      for (; position < source.length(); processCharacter()) {
        if (isUnicode && isBanned(currentCharacter)) {
          if (currentCharacter == '\\' && peek() == 'u') {
            continue;
          }
          state.reportMatch(
              describeMatch(
                  new FixedPosition(state.getPath().getCompilationUnit(), position),
                  SuggestedFix.replace(
                      lastBackslash, position + 1, Character.toString(currentCharacter))));
        }
      }
    }

    private void processCharacter() {
      if (currentCharacter == '\\') {
        lastBackslash = position;
        nextCharacter();
        // The isUnicode check is important because the Unicode escape for backslash can escape any
        // subsequent escape code... except "u".
        if (currentCharacter == 'u' && !isUnicode) {
          // The spec allows multiple "u" after the "\".
          do {
            nextCharacter();
          } while (currentCharacter == 'u');
          currentCharacter = (char) Integer.parseInt(source.substring(position, position + 4), 16);
          position += 3;
          isUnicode = true;
          return;
        }
        // If it's not a Unicode escape, we don't care about what character it actually encodes,
        // so let's just skip the char.
      }
      nextCharacter();
      isUnicode = false;
    }

    private void nextCharacter() {
      ++position;
      if (position < source.length()) {
        currentCharacter = source.charAt(position);
      }
    }

    /** Returns the next character, or {@code 0} if we're at the end of the file. */
    private char peek() {
      return position + 1 < source.length() ? source.charAt(position + 1) : 0;
    }
  }

  private static boolean isBanned(char c) {
    return (c >= 0x20 && c <= 0x7E) || c == 0xA || c == 0xD;
  }
}