All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.javascript.jscomp.parsing.parser.Scanner Maven / Gradle / Ivy

/*
 * Copyright 2011 The Closure Compiler Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.javascript.jscomp.parsing.parser;

import com.google.errorprone.annotations.FormatMethod;
import com.google.errorprone.annotations.FormatString;
import com.google.javascript.jscomp.parsing.parser.trees.Comment;
import com.google.javascript.jscomp.parsing.parser.util.ErrorReporter;
import com.google.javascript.jscomp.parsing.parser.util.SourcePosition;
import com.google.javascript.jscomp.parsing.parser.util.SourceRange;
import java.util.ArrayList;
import javax.annotation.Nullable;

/**
 * Scans javascript source code into tokens. All entrypoints assume the caller is not expecting a
 * regular expression literal except for nextRegularExpressionLiteralToken.
 *
 * 

7 Lexical Conventions */ public class Scanner { private final boolean parseTypeSyntax; private final ErrorReporter errorReporter; private final SourceFile source; private final LineNumberScanner lineNumberScanner; private final String contents; private final int contentsLength; private final ArrayList currentTokens = new ArrayList<>(); private int index; private final CommentRecorder commentRecorder; private int typeParameterLevel; public Scanner( boolean parseTypeSyntax, ErrorReporter errorReporter, CommentRecorder commentRecorder, SourceFile source) { this(parseTypeSyntax, errorReporter, commentRecorder, source, 0); } public Scanner( boolean parseTypeSyntax, ErrorReporter errorReporter, CommentRecorder commentRecorder, SourceFile file, int offset) { this.parseTypeSyntax = parseTypeSyntax; this.errorReporter = errorReporter; this.commentRecorder = commentRecorder; this.source = file; this.lineNumberScanner = new LineNumberScanner(source); // To help reason about the expected JVM performance unwrap "file" values. // The scanner is key to the parsing speed. this.contents = file.contents; this.contentsLength = file.contents.length(); this.index = offset; this.typeParameterLevel = 0; } public interface CommentRecorder { void recordComment(Comment.Type type, SourceRange range, String value); } public SourceFile getFile() { return source; } public int getOffset() { return currentTokens.isEmpty() ? index : peekToken().location.start.offset; } public void setPosition(SourcePosition position) { lineNumberScanner.rewindTo(position); currentTokens.clear(); this.index = position.offset; } public SourcePosition getPosition() { return currentTokens.isEmpty() ? getPosition(index) : peekToken().location.start; } private SourcePosition getPosition(int offset) { return lineNumberScanner.getSourcePosition(offset); } private SourceRange getTokenRange(int startOffset) { return lineNumberScanner.getSourceRange(startOffset, index); } /** Prefer this to {@link #getTokenRange(int)} when the token might span multiple lines. */ private SourceRange getTokenRange(SourcePosition position) { lineNumberScanner.rewindTo(position); return lineNumberScanner.getSourceRange(position.offset, index); } public Token nextToken() { peekToken(); return currentTokens.remove(0); } private void clearTokenLookahead() { if (!currentTokens.isEmpty()) { setPosition(peekToken().location.start); } } public LiteralToken nextRegularExpressionLiteralToken() { clearTokenLookahead(); int beginToken = index; // leading '/' nextChar(); // body if (!skipRegularExpressionBody()) { return new LiteralToken( TokenType.REGULAR_EXPRESSION, getTokenString(beginToken), getTokenRange(beginToken)); } // separating '/' if (peekChar() != '/') { reportError("Expected '/' in regular expression literal"); return new LiteralToken( TokenType.REGULAR_EXPRESSION, getTokenString(beginToken), getTokenRange(beginToken)); } nextChar(); // flags while (isIdentifierPart(peekChar())) { nextChar(); } return new LiteralToken( TokenType.REGULAR_EXPRESSION, getTokenString(beginToken), getTokenRange(beginToken)); } public TemplateLiteralToken nextTemplateLiteralToken() { Token token = nextToken(); if (isAtEnd() || token.type != TokenType.CLOSE_CURLY) { reportError(getPosition(index), "Expected '}' after expression in template literal"); } return nextTemplateLiteralTokenShared(TokenType.TEMPLATE_TAIL, TokenType.TEMPLATE_MIDDLE); } private boolean skipRegularExpressionBody() { if (!isRegularExpressionFirstChar(peekChar())) { reportError("Expected regular expression first char"); return false; } if (!skipRegularExpressionChar()) { return false; } while (!isAtEnd() && isRegularExpressionChar(peekChar())) { if (!skipRegularExpressionChar()) { return false; } } return true; } private boolean skipRegularExpressionChar() { switch (peekChar()) { case '\\': return skipRegularExpressionBackslashSequence(); case '[': return skipRegularExpressionClass(); default: nextChar(); return true; } } private boolean skipRegularExpressionBackslashSequence() { // TODO(tbreisacher): Warn if this is an unnecessary escape, like we do for string literals. nextChar(); if (isLineTerminator(peekChar())) { reportError("New line not allowed in regular expression literal"); return false; } nextChar(); return true; } private boolean skipRegularExpressionClass() { nextChar(); while (!isAtEnd() && peekRegularExpressionClassChar()) { if (!skipRegularExpressionClassChar()) { return false; } } if (peekChar() != ']') { reportError("']' expected"); return false; } nextChar(); return true; } private boolean peekRegularExpressionClassChar() { return peekChar() != ']' && !isLineTerminator(peekChar()); } private boolean skipRegularExpressionClassChar() { if (peek('\\')) { return skipRegularExpressionBackslashSequence(); } nextChar(); return true; } private static boolean isRegularExpressionFirstChar(char ch) { return isRegularExpressionChar(ch) && ch != '*'; } private static boolean isRegularExpressionChar(char ch) { switch (ch) { case '/': return false; case '\\': case '[': return true; default: return !isLineTerminator(ch); } } public Token peekToken() { return peekToken(0); } public Token peekToken(int index) { while (currentTokens.size() <= index) { currentTokens.add(scanToken()); } return currentTokens.get(index); } private boolean isAtEnd() { return !isValidIndex(index); } private boolean isValidIndex(int index) { return index >= 0 & index < contentsLength; } // 7.2 White Space /** Returns true if the whitespace that was skipped included any line terminators. */ private boolean skipWhitespace() { boolean foundLineTerminator = false; while (!isAtEnd() && peekWhitespace()) { if (isLineTerminator(nextChar())) { foundLineTerminator = true; } } return foundLineTerminator; } private boolean peekWhitespace() { return isWhitespace(peekChar()); } private static boolean isWhitespace(char ch) { switch (ch) { case '\u0009': // Tab case '\u000B': // Vertical Tab case '\u000C': // Form Feed case '\u0020': // Space case '\u00A0': // No-break space case '\uFEFF': // Byte Order Mark case '\n': // Line Feed case '\r': // Carriage Return case '\u2028': // Line Separator case '\u2029': // Paragraph Separator case '\u3000': // Ideographic Space // TODO: there are other Unicode Category 'Zs' chars that should go here. return true; default: return false; } } // 7.3 Line Terminators private static boolean isLineTerminator(char ch) { switch (ch) { case '\n': // Line Feed case '\r': // Carriage Return case '\u2028': // Line Separator case '\u2029': // Paragraph Separator return true; default: return false; } } // Allow line separator and paragraph separator in string literals. // https://github.com/tc39/proposal-json-superset private static boolean isStringLineTerminator(char ch) { switch (ch) { case '\u2028': // Line Separator case '\u2029': // Paragraph Separator return false; default: return isLineTerminator(ch); } } // 7.4 Comments private void skipComments() { while (skipComment()) {} } private boolean skipComment() { boolean isStartOfLine = skipWhitespace(); if (!isAtEnd()) { switch (peekChar(0)) { case '/': switch (peekChar(1)) { case '/': skipSingleLineComment(); return true; case '*': skipMultiLineComment(); return true; default: // fall out } break; case '<': // Check if this is the start of an HTML comment (""). // Note that the spec does not require us to check for this case, // but there is some legacy code that depends on this behavior. if (isStartOfLine && peekChar(1) == '-' && peekChar(2) == '>') { reportHtmlCommentWarning(); skipSingleLineComment(); return true; } break; case '#': if (index == 0 && peekChar(1) == '!') { skipSingleLineComment(Comment.Type.SHEBANG); return true; } break; default: // fall out } } return false; } private void reportHtmlCommentWarning() { reportWarning( "In some cases, '' are treated as a '//' " + "for legacy reasons. Removing this from your code is " + "safe for all browsers currently in use."); } private void skipSingleLineComment() { skipSingleLineComment(Comment.Type.LINE); } private void skipSingleLineComment(Comment.Type type) { int startOffset = index; while (!isAtEnd() && !isLineTerminator(peekChar())) { nextChar(); } SourceRange range = lineNumberScanner.getSourceRange(startOffset, index); String value = this.contents.substring(startOffset, index); recordComment(type, range, value); } private void recordComment(Comment.Type type, SourceRange range, String value) { commentRecorder.recordComment(type, range, value); } private void skipMultiLineComment() { int startOffset = index; nextChar(); // '/' nextChar(); // '*' while (!isAtEnd() && (peekChar() != '*' || peekChar(1) != '/')) { nextChar(); } if (!isAtEnd()) { nextChar(); nextChar(); Comment.Type type = Comment.Type.BLOCK; if (index - startOffset > 4) { if (this.contents.charAt(startOffset + 2) == '*') { type = Comment.Type.JSDOC; } else if (this.contents.charAt(startOffset + 2) == '!') { type = Comment.Type.IMPORTANT; } } SourceRange range = lineNumberScanner.getSourceRange(startOffset, index); String value = this.contents.substring(startOffset, index); recordComment(type, range, value); } else { reportError("unterminated comment"); } } private Token scanToken() { skipComments(); int beginToken = index; if (isAtEnd()) { return createToken(TokenType.END_OF_FILE, beginToken); } char ch = nextChar(); switch (ch) { case '{': return createToken(TokenType.OPEN_CURLY, beginToken); case '}': return createToken(TokenType.CLOSE_CURLY, beginToken); case '(': return createToken(TokenType.OPEN_PAREN, beginToken); case ')': return createToken(TokenType.CLOSE_PAREN, beginToken); case '[': return createToken(TokenType.OPEN_SQUARE, beginToken); case ']': return createToken(TokenType.CLOSE_SQUARE, beginToken); case '.': if (isDecimalDigit(peekChar())) { return scanNumberPostPeriod(beginToken); } // Harmony spread operator if (peek('.') && peekChar(1) == '.') { nextChar(); nextChar(); return createToken(TokenType.ELLIPSIS, beginToken); } return createToken(TokenType.PERIOD, beginToken); case ';': return createToken(TokenType.SEMI_COLON, beginToken); case ',': return createToken(TokenType.COMMA, beginToken); case '~': return createToken(TokenType.TILDE, beginToken); case '?': if (peek('?')) { // see ?? nextChar(); return createToken(TokenType.QUESTION_QUESTION, beginToken); } if (peek('.')) { // see ?. if (!isDecimalDigit(peekChar(1))) { nextChar(); // a?.1:2 should be a ? 0.1 : 2 not a ?. 1 : 2 (syntax error) return createToken(TokenType.QUESTION_DOT, beginToken); } } return createToken(TokenType.QUESTION, beginToken); case ':': return createToken(TokenType.COLON, beginToken); case '<': switch (peekChar()) { case '<': nextChar(); if (peek('=')) { nextChar(); return createToken(TokenType.LEFT_SHIFT_EQUAL, beginToken); } return createToken(TokenType.LEFT_SHIFT, beginToken); case '=': nextChar(); return createToken(TokenType.LESS_EQUAL, beginToken); default: return createToken(TokenType.OPEN_ANGLE, beginToken); } case '>': if (typeParameterLevel > 0) { return createToken(TokenType.CLOSE_ANGLE, beginToken); } switch (peekChar()) { case '>': nextChar(); switch (peekChar()) { case '=': nextChar(); return createToken(TokenType.RIGHT_SHIFT_EQUAL, beginToken); case '>': nextChar(); if (peek('=')) { nextChar(); return createToken(TokenType.UNSIGNED_RIGHT_SHIFT_EQUAL, beginToken); } return createToken(TokenType.UNSIGNED_RIGHT_SHIFT, beginToken); default: return createToken(TokenType.RIGHT_SHIFT, beginToken); } case '=': nextChar(); return createToken(TokenType.GREATER_EQUAL, beginToken); default: return createToken(TokenType.CLOSE_ANGLE, beginToken); } case '=': switch (peekChar()) { case '=': nextChar(); if (peek('=')) { nextChar(); return createToken(TokenType.EQUAL_EQUAL_EQUAL, beginToken); } return createToken(TokenType.EQUAL_EQUAL, beginToken); case '>': nextChar(); return createToken(TokenType.ARROW, beginToken); default: return createToken(TokenType.EQUAL, beginToken); } case '!': if (peek('=')) { nextChar(); if (peek('=')) { nextChar(); return createToken(TokenType.NOT_EQUAL_EQUAL, beginToken); } return createToken(TokenType.NOT_EQUAL, beginToken); } return createToken(TokenType.BANG, beginToken); case '*': if (peek('=')) { nextChar(); return createToken(TokenType.STAR_EQUAL, beginToken); } else if (peek('*')) { nextChar(); // '**' seen so far if (peek('=')) { nextChar(); return createToken(TokenType.STAR_STAR_EQUAL, beginToken); } else { return createToken(TokenType.STAR_STAR, beginToken); } } return createToken(TokenType.STAR, beginToken); case '%': if (peek('=')) { nextChar(); return createToken(TokenType.PERCENT_EQUAL, beginToken); } return createToken(TokenType.PERCENT, beginToken); case '^': if (peek('=')) { nextChar(); return createToken(TokenType.CARET_EQUAL, beginToken); } return createToken(TokenType.CARET, beginToken); case '/': if (peek('=')) { nextChar(); return createToken(TokenType.SLASH_EQUAL, beginToken); } return createToken(TokenType.SLASH, beginToken); case '+': switch (peekChar()) { case '+': nextChar(); return createToken(TokenType.PLUS_PLUS, beginToken); case '=': nextChar(); return createToken(TokenType.PLUS_EQUAL, beginToken); default: return createToken(TokenType.PLUS, beginToken); } case '-': switch (peekChar()) { case '-': nextChar(); return createToken(TokenType.MINUS_MINUS, beginToken); case '=': nextChar(); return createToken(TokenType.MINUS_EQUAL, beginToken); default: return createToken(TokenType.MINUS, beginToken); } case '&': switch (peekChar()) { case '&': nextChar(); return createToken(TokenType.AND, beginToken); case '=': nextChar(); return createToken(TokenType.AMPERSAND_EQUAL, beginToken); default: return createToken(TokenType.AMPERSAND, beginToken); } case '|': switch (peekChar()) { case '|': nextChar(); return createToken(TokenType.OR, beginToken); case '=': nextChar(); return createToken(TokenType.BAR_EQUAL, beginToken); default: return createToken(TokenType.BAR, beginToken); } case '#': return createToken(TokenType.POUND, beginToken); // TODO: add NumberToken // TODO: character following NumericLiteral must not be an IdentifierStart or DecimalDigit case '0': return scanPostZero(beginToken); case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return scanPostDigit(beginToken); case '"': case '\'': return scanStringLiteral(beginToken, ch); case '`': return scanTemplateLiteral(beginToken); default: return scanIdentifierOrKeyword(beginToken, ch); } } private Token scanNumberPostPeriod(int beginToken) { skipDecimalDigits(); return scanExponentOfNumericLiteral(beginToken); } private Token scanPostDigit(int beginToken) { skipDecimalDigits(); if (peek('n')) { nextChar(); return new LiteralToken( TokenType.BIGINT, getTokenString(beginToken), getTokenRange(beginToken)); } return scanFractionalNumericLiteral(beginToken); } private Token scanPostZero(int beginToken) { switch (peekChar()) { case 'b': case 'B': // binary nextChar(); if (!isBinaryDigit(peekChar())) { reportError("Binary Integer Literal must contain at least one digit"); } skipBinaryDigits(); boolean isBigInt = peek('n'); if (isBigInt) { nextChar(); } return new LiteralToken( isBigInt ? TokenType.BIGINT : TokenType.NUMBER, getTokenString(beginToken), getTokenRange(beginToken)); case 'o': case 'O': // octal nextChar(); if (!isOctalDigit(peekChar())) { reportError("Octal Integer Literal must contain at least one digit"); } skipOctalDigits(); if (peek('8') || peek('9')) { reportError("Invalid octal digit in octal literal."); } isBigInt = peek('n'); if (isBigInt) { nextChar(); } return new LiteralToken( isBigInt ? TokenType.BIGINT : TokenType.NUMBER, getTokenString(beginToken), getTokenRange(beginToken)); case 'x': case 'X': nextChar(); if (!peekHexDigit()) { reportError("Hex Integer Literal must contain at least one digit"); } skipHexDigits(); isBigInt = peek('n'); if (isBigInt) { nextChar(); } return new LiteralToken( isBigInt ? TokenType.BIGINT : TokenType.NUMBER, getTokenString(beginToken), getTokenRange(beginToken)); case 'e': case 'E': return scanExponentOfNumericLiteral(beginToken); case '.': return scanFractionalNumericLiteral(beginToken); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': skipDecimalDigits(); if (peek('.')) { nextChar(); skipDecimalDigits(); } if (peek('n')) { reportError("SyntaxError: nonzero BigInt can't have leading zero"); } return new LiteralToken( TokenType.NUMBER, getTokenString(beginToken), getTokenRange(beginToken)); case 'n': nextChar(); return new LiteralToken( TokenType.BIGINT, getTokenString(beginToken), getTokenRange(beginToken)); default: return new LiteralToken( TokenType.NUMBER, getTokenString(beginToken), getTokenRange(beginToken)); } } private Token createToken(TokenType type, int beginToken) { return new Token(type, getTokenRange(beginToken)); } private Token scanIdentifierOrKeyword(int beginToken, char ch) { // NOTE: This code previously used a StringBuilder to collect the characters of the identifier // or keyword. Recording the staring position and using contents.substring() below instead was // found to eliminate 1.84% of all JVM "frequently collected garbage" in the compilation of a // large project. int valueStartIndex = index - 1; boolean containsUnicodeEscape = ch == '\\'; boolean bracedUnicodeEscape = false; int unicodeEscapeLen = containsUnicodeEscape ? 1 : 0; ch = peekChar(); while (isIdentifierPart(ch) || ch == '\\' || (ch == '{' && unicodeEscapeLen == 2) || (ch == '}' && bracedUnicodeEscape)) { if (ch == '\\') { containsUnicodeEscape = true; } // Update length of current Unicode escape. if (ch == '\\' || unicodeEscapeLen > 0) { unicodeEscapeLen++; } // Enter Unicode point escape. if (ch == '{') { bracedUnicodeEscape = true; } // Exit Unicode escape if (ch == '}' || (unicodeEscapeLen >= 6 && !bracedUnicodeEscape)) { bracedUnicodeEscape = false; unicodeEscapeLen = 0; } // Add character to token nextChar(); ch = peekChar(); } String value = contents.substring(valueStartIndex, index); // Process unicode escapes. if (containsUnicodeEscape) { value = processUnicodeEscapes(value); if (value == null) { reportError(getPosition(index), "Invalid escape sequence"); return createToken(TokenType.ERROR, beginToken); } } // Check to make sure the first character (or the unicode escape at the // beginning of the identifier) is a valid identifier start character. char start = value.charAt(0); if (!isIdentifierStart(start)) { reportError( getPosition(beginToken), "Character '%c' (U+%04X) is not a valid identifier start char", start, (int) start); return createToken(TokenType.ERROR, beginToken); } Keywords k = Keywords.get(value, parseTypeSyntax); if (k != null) { return new Token(k.type, getTokenRange(beginToken)); } return new IdentifierToken(getTokenRange(beginToken), value); } /** * Converts unicode escapes in the given string to the equivalent unicode character. If there are * no escapes, returns the input unchanged. If there is an invalid escape sequence, returns null. */ private static String processUnicodeEscapes(String value) { while (value.contains("\\")) { int escapeStart = value.indexOf('\\'); try { if (value.charAt(escapeStart + 1) != 'u') { return null; } String hexDigits; int escapeEnd; if (value.charAt(escapeStart + 2) != '{') { // Simple escape with exactly four hex digits: \\uXXXX escapeEnd = escapeStart + 6; // TODO(b/155480859): Don't trust String#substring to throw on out of bounds. J2CL // implements it incorrectly. if (escapeEnd > value.length()) { return null; } hexDigits = value.substring(escapeStart + 2, escapeEnd); } else { // Escape with braces can have any number of hex digits: \\u{XXXXXXX} escapeEnd = escapeStart + 3; while (isHexDigit(value.charAt(escapeEnd))) { escapeEnd++; } if (value.charAt(escapeEnd) != '}') { return null; } hexDigits = value.substring(escapeStart + 3, escapeEnd); escapeEnd++; } // TODO(mattloring): Allow code points >= 0xFFFF (greater than the size of a char). char ch = (char) Integer.parseInt(hexDigits, 0x10); if (!isIdentifierPart(ch)) { return null; } value = value.substring(0, escapeStart) + ch + value.substring(escapeEnd); } catch (NumberFormatException | StringIndexOutOfBoundsException e) { return null; } } return value; } @SuppressWarnings("ShortCircuitBoolean") // Intentional to minimize branches in this code private static boolean isIdentifierStart(char ch) { // Most code is written in pure ASCII, so create a fast path here. if (ch <= 127) { // Intentionally avoiding short circuiting behavior of "||" and "&&". // This minimizes branches in this code which minimizes branch prediction misses. return ((ch >= 'A' & ch <= 'Z') | (ch >= 'a' & ch <= 'z') | (ch == '_' | ch == '$')); } // Handle non-ASCII characters. // TODO(tjgq): This should include all characters with the ID_Start property. if (Character.isLetter(ch)) { return true; } // Workaround for b/36459436. // When running under GWT/J2CL, Character.isLetter only handles ASCII. // Angular relies heavily on Latin Small Letter Barred O and Greek Capital Letter Delta. // Greek letters are occasionally found in math code. // Latin letters are found in our own tests. return (ch >= 0x00C0 & ch <= 0x00D6) // Latin letters // 0x00D7 = multiplication sign, not a letter | (ch >= 0x00D8 & ch <= 0x00F6) // Latin letters // 0x00F7 = division sign, not a letter | (ch >= 0x00F8 & ch <= 0x00FF) // Latin letters | ch == 0x0275 // Latin Barred O | (ch >= 0x0391 & ch <= 0x03A1) // Greek uppercase letters // 0x03A2 = unassigned | (ch >= 0x03A3 & ch <= 0x03A9) // Remaining Greek uppercase letters | (ch >= 0x03B1 & ch <= 0x03C9); // Greek lowercase letters } @SuppressWarnings("ShortCircuitBoolean") // Intentional to minimize branches in this code private static boolean isIdentifierPart(char ch) { // Most code is written in pure ASCII, so create a fast path here. if (ch <= 127) { return ((ch >= 'A' & ch <= 'Z') | (ch >= 'a' & ch <= 'z') | (ch >= '0' & ch <= '9') | (ch == '_' | ch == '$')); } // Handle non-ASCII characters. // TODO(tjgq): This should include all characters with the ID_Continue property, plus // Zero Width Non-Joiner and Zero Width Joiner. return isIdentifierStart(ch) || Character.isDigit(ch); } private Token scanStringLiteral(int beginIndex, char terminator) { // String literals might span multiple lines. SourcePosition startingPosition = getPosition(beginIndex); boolean hasUnescapedUnicodeLineOrParagraphSeparator = false; while (peekStringLiteralChar(terminator)) { char c = peekChar(); hasUnescapedUnicodeLineOrParagraphSeparator = hasUnescapedUnicodeLineOrParagraphSeparator || c == '\u2028' || c == '\u2029'; if (!skipStringLiteralChar()) { return new StringLiteralToken( getTokenString(beginIndex), getTokenRange(startingPosition), hasUnescapedUnicodeLineOrParagraphSeparator); } } if (peekChar() != terminator) { reportError(getPosition(beginIndex), "Unterminated string literal"); } else { nextChar(); } return new StringLiteralToken( getTokenString(beginIndex), getTokenRange(startingPosition), hasUnescapedUnicodeLineOrParagraphSeparator); } private Token scanTemplateLiteral(int beginIndex) { if (isAtEnd()) { reportError(getPosition(beginIndex), "Unterminated template literal"); } return nextTemplateLiteralTokenShared( TokenType.NO_SUBSTITUTION_TEMPLATE, TokenType.TEMPLATE_HEAD); } private TemplateLiteralToken nextTemplateLiteralTokenShared( TokenType endType, TokenType middleType) { int beginIndex = index; SkipTemplateCharactersResult skipTemplateCharactersResult = skipTemplateCharacters(); if (isAtEnd()) { reportError(getPosition(beginIndex), "Unterminated template literal"); } String value = getTokenString(beginIndex); switch (peekChar()) { case '`': nextChar(); return new TemplateLiteralToken( endType, value, skipTemplateCharactersResult.getErrorMessage(), skipTemplateCharactersResult.getPosition(), getTokenRange(beginIndex - 1)); case '$': nextChar(); // $ nextChar(); // { return new TemplateLiteralToken( middleType, value, skipTemplateCharactersResult.getErrorMessage(), skipTemplateCharactersResult.getPosition(), getTokenRange(beginIndex - 1)); default: // Should have reported error already return new TemplateLiteralToken( endType, value, skipTemplateCharactersResult.getErrorMessage(), skipTemplateCharactersResult.getPosition(), getTokenRange(beginIndex - 1)); } } private String getTokenString(int beginIndex) { return this.contents.substring(beginIndex, index); } private boolean peekStringLiteralChar(char terminator) { return !isAtEnd() && peekChar() != terminator && !isStringLineTerminator(peekChar()); } private boolean skipStringLiteralChar() { if (peek('\\')) { return skipStringLiteralEscapeSequence(); } nextChar(); return true; } private SkipTemplateCharactersResult skipTemplateCharacters() { SkipTemplateCharactersResult result = createSkipTemplateCharactersResult(null); while (!isAtEnd()) { switch (peekChar()) { case '`': return result; case '\\': // There might be multiple errors. Take the first one but continue scanning SkipTemplateCharactersResult newError = skipTemplateLiteralEscapeSequence(); if (newError != null && !result.hasError()) { result = newError; } break; case '$': if (peekChar(1) == '{') { return result; } // Fall through. default: nextChar(); } } return result; } @SuppressWarnings("IdentityBinaryExpression") // for "skipHexDigit() && skipHexDigit()" private SkipTemplateCharactersResult skipTemplateLiteralEscapeSequence() { nextChar(); if (isAtEnd()) { reportError("Unterminated template literal escape sequence"); return null; } if (isLineTerminator(peekChar())) { skipLineTerminator(); return null; } char next = nextChar(); switch (next) { case '0': if (peekOctalDigit()) { return createSkipTemplateCharactersResult("Invalid escape sequence"); } return null; case '1': case '2': case '3': case '4': case '5': case '6': case '7': return createSkipTemplateCharactersResult("Invalid escape sequence"); case 'x': boolean doubleHexDigit = skipHexDigit() && skipHexDigit(); if (!doubleHexDigit) { return createSkipTemplateCharactersResult("Hex digit expected"); } return null; case 'u': if (peek('{')) { nextChar(); if (peek('}')) { return createSkipTemplateCharactersResult("Empty unicode escape"); } boolean allHexDigits = true; while (!peek('}') && allHexDigits) { allHexDigits = allHexDigits && skipHexDigit(); } if (!allHexDigits) { return createSkipTemplateCharactersResult("Hex digit expected"); } nextChar(); return null; } else { boolean quadHexDigit = skipHexDigit() && skipHexDigit() && skipHexDigit() && skipHexDigit(); if (!quadHexDigit) { return createSkipTemplateCharactersResult("Hex digit expected"); } return null; } default: return null; } } @SuppressWarnings("IdentityBinaryExpression") // for "skipHexDigit() && skipHexDigit()" private boolean skipStringLiteralEscapeSequence() { nextChar(); if (isAtEnd()) { reportError("Unterminated string literal escape sequence"); return false; } if (isStringLineTerminator(peekChar())) { skipLineTerminator(); return true; } char next = nextChar(); switch (next) { case '\'': case '"': case '`': case '\\': case 'b': case 'f': case 'n': case 'r': case 't': case 'v': case '0': return true; case '1': case '2': case '3': case '4': case '5': case '6': case '7': break; case 'x': boolean doubleHexDigit = skipHexDigit() && skipHexDigit(); if (!doubleHexDigit) { reportError("Hex digit expected"); } return doubleHexDigit; case 'u': if (peek('{')) { nextChar(); if (peek('}')) { reportError("Empty unicode escape"); return false; } boolean allHexDigits = true; while (!peek('}') && allHexDigits) { allHexDigits = allHexDigits && skipHexDigit(); } if (!allHexDigits) { reportError("Hex digit expected"); } nextChar(); return allHexDigits; } else { boolean quadHexDigit = skipHexDigit() && skipHexDigit() && skipHexDigit() && skipHexDigit(); if (!quadHexDigit) { reportError("Hex digit expected"); } return quadHexDigit; } default: break; } if (next == '/') { // Don't warn for '\/' (for now) since it's common in "<\/script>" } else { reportWarning("Unnecessary escape: '\\%s' is equivalent to just '%s'", next, next); } return true; } private boolean skipHexDigit() { if (!peekHexDigit()) { return false; } nextChar(); return true; } private void skipLineTerminator() { char first = nextChar(); if (first == '\r' && peek('\n')) { nextChar(); } } private LiteralToken scanFractionalNumericLiteral(int beginToken) { if (peek('.')) { nextChar(); skipDecimalDigits(); } return scanExponentOfNumericLiteral(beginToken); } private LiteralToken scanExponentOfNumericLiteral(int beginToken) { switch (peekChar()) { case 'e': case 'E': nextChar(); switch (peekChar()) { case '+': case '-': nextChar(); break; default: // fall out } if (!isDecimalDigit(peekChar())) { reportError("Exponent part must contain at least one digit"); } skipDecimalDigits(); break; default: break; } return new LiteralToken( TokenType.NUMBER, getTokenString(beginToken), getTokenRange(beginToken)); } private void skipDecimalDigits() { char ch = peekChar(); while (isDecimalDigit(ch) || ch == '_') { nextChar(); if (ch == '_') { if (isDecimalDigit(peekChar())) { nextChar(); } else { reportError("Trailing numeric separator"); } } ch = peekChar(); } } private static boolean isDecimalDigit(char ch) { switch (ch) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return true; default: return false; } } private boolean peekHexDigit() { return isHexDigit(peekChar()); } private static boolean isHexDigit(char ch) { return Character.digit(ch, 0x10) >= 0; } private void skipHexDigits() { char ch = peekChar(); while (isHexDigit(ch) || ch == '_') { nextChar(); if (ch == '_') { if (peekHexDigit()) { nextChar(); } else { reportError("Trailing numeric separator"); } } ch = peekChar(); } } private boolean peekOctalDigit() { return isOctalDigit(peekChar()); } private void skipOctalDigits() { char ch = peekChar(); while (isOctalDigit(ch) || ch == '_') { nextChar(); if (ch == '_') { if (isOctalDigit(peekChar())) { nextChar(); } else { reportError("Trailing numeric separator"); } } ch = peekChar(); } } private static boolean isOctalDigit(char ch) { return valueOfOctalDigit(ch) >= 0; } private static int valueOfOctalDigit(char ch) { switch (ch) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': return ch - '0'; default: return -1; } } private void skipBinaryDigits() { char ch = peekChar(); while (isBinaryDigit(ch) || ch == '_') { nextChar(); if (ch == '_') { if (isBinaryDigit(peekChar())) { nextChar(); } else { reportError("Trailing numeric separator"); } } ch = peekChar(); } } private static boolean isBinaryDigit(char ch) { return valueOfBinaryDigit(ch) >= 0; } private static int valueOfBinaryDigit(char ch) { switch (ch) { case '0': return 0; case '1': return 1; default: return -1; } } private char nextChar() { if (isAtEnd()) { return '\0'; } return contents.charAt(index++); } private boolean peek(char ch) { return peekChar() == ch; } private char peekChar() { return peekChar(0); } private char peekChar(int offset) { return !isValidIndex(index + offset) ? '\0' : contents.charAt(index + offset); } @FormatMethod private void reportError(@FormatString String format, Object... arguments) { reportError(getPosition(), format, arguments); } @FormatMethod private void reportError( SourcePosition position, @FormatString String format, Object... arguments) { errorReporter.reportError(position, format, arguments); } @FormatMethod private void reportWarning(@FormatString String format, Object... arguments) { errorReporter.reportWarning(getPosition(), format, arguments); } void incTypeParameterLevel() { typeParameterLevel++; } void decTypeParameterLevel() { typeParameterLevel--; } private SkipTemplateCharactersResult createSkipTemplateCharactersResult(String message) { return new SkipTemplateCharactersResult(message, getPosition()); } private static class SkipTemplateCharactersResult { @Nullable private final String errorMessage; private final SourcePosition position; SkipTemplateCharactersResult(String message, SourcePosition position) { this.errorMessage = message; this.position = position; } String getErrorMessage() { return this.errorMessage; } SourcePosition getPosition() { return this.position; } boolean hasError() { return this.errorMessage != null; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy