org.netbeans.lib.java.lexer.JavaLexer Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.netbeans.lib.java.lexer;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.function.Supplier;
import org.netbeans.api.java.lexer.JavaTokenId;
import org.netbeans.api.lexer.PartType;
import org.netbeans.api.lexer.Token;
import org.netbeans.spi.lexer.Lexer;
import org.netbeans.spi.lexer.LexerInput;
import org.netbeans.spi.lexer.LexerRestartInfo;
import org.netbeans.spi.lexer.TokenFactory;
/**
* Lexical analyzer for java language.
*
* It recognizes "version" attribute and expects java.lang.Integer
* value for it. The default value is Integer.valueOf(5). The lexer changes
* its behavior in the following way:
*
* - Integer.valueOf(4) - "assert" recognized as keyword (not identifier)
*
- Integer.valueOf(5) - "enum" recognized as keyword (not identifier)
*
*
* @author Miloslav Metelka
* @version 1.00
*/
//XXX: be carefull about flyweight tokens - needs to check if the inputX.readLength() matches the image!
public class JavaLexer implements Lexer {
private static final int EOF = LexerInput.EOF;
private final LexerInput input;
private final TokenFactory tokenFactory;
private final int version;
private Integer state = null;
public JavaLexer(LexerRestartInfo info) {
this.input = info.input();
this.tokenFactory = info.tokenFactory();
if (info.state() instanceof ComplexState) {
ComplexState complex = (ComplexState) info.state();
this.pendingStringLiteral = complex.pendingStringLiteral;
this.pendingBraces = complex.pendingBraces;
this.literalHistory = complex.literalHistory;
this.state = complex.state;
} else {
this.state = (Integer) info.state();
}
if (state == null) {
Supplier fileName = (Supplier)info.getAttributeValue("fileName"); //NOI18N
if (fileName != null && "module-info.java".equals(fileName.get())) { //NOI18N
state = 1; // parsing module info
}
}
Integer ver = null;
Object verAttribute = info.getAttributeValue("version"); //NOI18N
if (verAttribute instanceof Supplier) {
Object val = ((Supplier) verAttribute).get();
if (val instanceof String) {
ver = getVersionAsInt(((Supplier) (verAttribute)).get());
}
} else if (verAttribute instanceof Integer) {
ver = (Integer) verAttribute;
}
this.version = (ver != null) ? ver.intValue() : 10; // TODO: Java 1.8 used by default
}
private static final class ComplexState {
public final JavaTokenId pendingStringLiteral;
public final int pendingBraces;
public final LiteralHistoryNode literalHistory;
public final Integer state;
public ComplexState(JavaTokenId pendingStringLiteral, int pendingBraces,
LiteralHistoryNode literalHistory, Integer state) {
this.pendingStringLiteral = pendingStringLiteral;
this.pendingBraces = pendingBraces;
this.literalHistory = literalHistory;
this.state = state;
}
@Override
public int hashCode() {
int hash = 7;
hash = 97 * hash + Objects.hashCode(this.pendingStringLiteral);
hash = 97 * hash + this.pendingBraces;
hash = 97 * hash + Objects.hashCode(this.literalHistory);
hash = 97 * hash + Objects.hashCode(this.state);
return hash;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final ComplexState other = (ComplexState) obj;
if (this.pendingBraces != other.pendingBraces) {
return false;
}
if (this.pendingStringLiteral != other.pendingStringLiteral) {
return false;
}
if (!Objects.equals(this.literalHistory, other.literalHistory)) {
return false;
}
return Objects.equals(this.state, other.state);
}
}
public Object state() {
if (pendingStringLiteral != null) {
return new ComplexState(pendingStringLiteral, 0, literalHistory, state);
}
return state;
}
int previousLength = -1;
int currentLength = -1;
public int nextChar() {
previousLength = currentLength;
int backupReadLength = input.readLength();
int c = input.read();
if (c != '\\') {
currentLength = 1;
return c;
}
boolean wasU = false;
int first;
while ((first = input.read()) == 'u')
wasU = true;
if (!wasU) {
input.backup(input.readLengthEOF()- backupReadLength);
currentLength = 1;
return input.read();
}
int second = input.read();
int third = input.read();
int fourth = input.read();
if (fourth == LexerInput.EOF) {
//TODO: broken unicode
input.backup(input.readLengthEOF()- backupReadLength);
currentLength = 1;
return input.read();
}
first = Character.digit(first, 16);
second = Character.digit(second, 16);
third = Character.digit(third, 16);
fourth = Character.digit(fourth, 16);
if (first == (-1) || second == (-1) || third == (-1) || fourth == (-1)) {
//TODO: broken unicode
input.backup(input.readLengthEOF()- backupReadLength);
currentLength = 1;
return input.read();
}
currentLength = input.readLength() - backupReadLength;
return ((first * 16 + second) * 16 + third) * 16 + fourth;
}
public void backup(int howMany) {
switch (howMany) {
case 1:
assert currentLength != (-1);
input.backup(currentLength);
currentLength = previousLength;
previousLength = (-1);
break;
case 2:
assert currentLength != (-1) && previousLength != (-1);
input.backup(currentLength + previousLength);
currentLength = previousLength = (-1);
break;
default:
assert false : howMany;
}
}
public void consumeNewline() {
if (nextChar() != '\n') backup(1);
}
private JavaTokenId pendingStringLiteral;
private int pendingBraces;
private class LiteralHistoryNode {
public final JavaTokenId pendingStringLiteral;
public final int pendingBraces;
public final LiteralHistoryNode next;
public LiteralHistoryNode(JavaTokenId pendingStringLiteral, int pendingBraces, LiteralHistoryNode next) {
this.pendingStringLiteral = pendingStringLiteral;
this.pendingBraces = pendingBraces;
this.next = next;
}
}
LiteralHistoryNode literalHistory = null;
public Token nextToken() {
boolean stringLiteralContinuation = false;
JavaTokenId lookupId = null;
while(true) {
int c = stringLiteralContinuation ? '"' : nextChar();
switch (c) {
case '#':
//Support for exotic identifiers has been removed 6999438
return token(JavaTokenId.ERROR);
case '"': // string literal
if (lookupId == null) lookupId = JavaTokenId.STRING_LITERAL;
while (true) {
switch (nextChar()) {
case '"': // NOI18N
String text = input.readText().toString();
if (text.length() == 2 && !stringLiteralContinuation) {
int mark = input.readLength();
if (nextChar() != '"') {
input.backup(1); //TODO: EOF???
return token(lookupId);
}
int c2 = nextChar();
while (Character.isWhitespace(c2) && c2 != '\n') {
c2 = nextChar();
}
if (c2 != '\n') {
input.backup(input.readLengthEOF()- mark);
return token(lookupId);
}
lookupId = JavaTokenId.MULTILINE_STRING_LITERAL;
}
if (lookupId == JavaTokenId.MULTILINE_STRING_LITERAL) {
if (text.endsWith("\"\"\"") && !text.endsWith("\\\"\"\"") && (text.length() > 6 || stringLiteralContinuation)) {
return token(lookupId, stringLiteralContinuation ? PartType.END : PartType.COMPLETE);
} else {
break;
}
}
return token(lookupId, stringLiteralContinuation ? PartType.END : PartType.COMPLETE);
case '\\':
switch (nextChar()) {
case '{':
if (pendingStringLiteral != null) {
literalHistory = new LiteralHistoryNode(pendingStringLiteral, pendingBraces, literalHistory);
}
pendingStringLiteral = lookupId;
pendingBraces = 0;
return token(lookupId, stringLiteralContinuation ? PartType.MIDDLE : PartType.START);
}
break;
case '\r': consumeNewline();
case '\n':
if (lookupId == JavaTokenId.MULTILINE_STRING_LITERAL) {
break;
}
case EOF:
return tokenFactory.createToken(lookupId, //XXX: \n handling for exotic identifiers?
input.readLength(), PartType.START);
}
}
case '\'': // char literal
while (true)
switch (nextChar()) {
case '\'': // NOI18N
return token(JavaTokenId.CHAR_LITERAL);
case '\\':
nextChar(); // read escaped char
break;
case '\r': consumeNewline();
case '\n':
case EOF:
return tokenFactory.createToken(JavaTokenId.CHAR_LITERAL,
input.readLength(), PartType.START);
}
case '/':
switch (nextChar()) {
case '/': // in single-line comment
switch (nextChar()) {
case '/': return finishJavadocLineRun();
case '\r': consumeNewline();
case '\n':
case EOF:
return token(JavaTokenId.LINE_COMMENT);
}
while (true)
switch (nextChar()) {
case '\r': consumeNewline();
case '\n':
case EOF:
return token(JavaTokenId.LINE_COMMENT);
}
case '=': // found /=
return token(JavaTokenId.SLASHEQ);
case '*': // in multi-line or javadoc comment
c = nextChar();
if (c == '*') { // either javadoc comment or empty multi-line comment /**/
c = nextChar();
if (c == '/')
return token(JavaTokenId.BLOCK_COMMENT);
while (true) { // in javadoc comment
while (c == '*') {
c = nextChar();
if (c == '/')
return token(JavaTokenId.JAVADOC_COMMENT);
else if (c == EOF)
return tokenFactory.createToken(JavaTokenId.JAVADOC_COMMENT,
input.readLength(), PartType.START);
}
if (c == EOF)
return tokenFactory.createToken(JavaTokenId.JAVADOC_COMMENT,
input.readLength(), PartType.START);
c = nextChar();
}
} else { // in multi-line comment (and not after '*')
while (true) {
c = nextChar();
while (c == '*') {
c = nextChar();
if (c == '/')
return token(JavaTokenId.BLOCK_COMMENT);
else if (c == EOF)
return tokenFactory.createToken(JavaTokenId.BLOCK_COMMENT,
input.readLength(), PartType.START);
}
if (c == EOF)
return tokenFactory.createToken(JavaTokenId.BLOCK_COMMENT,
input.readLength(), PartType.START);
}
}
} // end of switch()
backup(1);
return token(JavaTokenId.SLASH);
case '=':
if (nextChar() == '=')
return token(JavaTokenId.EQEQ);
backup(1);
return token(JavaTokenId.EQ);
case '>':
switch (nextChar()) {
case '>': // after >>
switch (c = nextChar()) {
case '>': // after >>>
if (nextChar() == '=')
return token(JavaTokenId.GTGTGTEQ);
backup(1);
return token(JavaTokenId.GTGTGT);
case '=': // >>=
return token(JavaTokenId.GTGTEQ);
}
backup(1);
return token(JavaTokenId.GTGT);
case '=': // >=
return token(JavaTokenId.GTEQ);
}
backup(1);
return token(JavaTokenId.GT);
case '<':
switch (nextChar()) {
case '<': // after <<
if (nextChar() == '=')
return token(JavaTokenId.LTLTEQ);
backup(1);
return token(JavaTokenId.LTLT);
case '=': // <=
return token(JavaTokenId.LTEQ);
}
backup(1);
return token(JavaTokenId.LT);
case '+':
switch (nextChar()) {
case '+':
return token(JavaTokenId.PLUSPLUS);
case '=':
return token(JavaTokenId.PLUSEQ);
}
backup(1);
return token(JavaTokenId.PLUS);
case '-':
switch (nextChar()) {
case '-':
return token(JavaTokenId.MINUSMINUS);
case '=':
return token(JavaTokenId.MINUSEQ);
case '>':
return token(JavaTokenId.ARROW);
}
backup(1);
return token(JavaTokenId.MINUS);
case '*':
switch (nextChar()) {
case '/': // invalid comment end - */
return token(JavaTokenId.INVALID_COMMENT_END);
case '=':
return token(JavaTokenId.STAREQ);
}
backup(1);
return token(JavaTokenId.STAR);
case '|':
switch (nextChar()) {
case '|':
return token(JavaTokenId.BARBAR);
case '=':
return token(JavaTokenId.BAREQ);
}
backup(1);
return token(JavaTokenId.BAR);
case '&':
switch (nextChar()) {
case '&':
return token(JavaTokenId.AMPAMP);
case '=':
return token(JavaTokenId.AMPEQ);
}
backup(1);
return token(JavaTokenId.AMP);
case '%':
if (nextChar() == '=')
return token(JavaTokenId.PERCENTEQ);
backup(1);
return token(JavaTokenId.PERCENT);
case '^':
if (nextChar() == '=')
return token(JavaTokenId.CARETEQ);
backup(1);
return token(JavaTokenId.CARET);
case '!':
if (nextChar() == '=')
return token(JavaTokenId.BANGEQ);
backup(1);
return token(JavaTokenId.BANG);
case '.':
if ((c = nextChar()) == '.')
if (nextChar() == '.') { // ellipsis ...
return token(JavaTokenId.ELLIPSIS);
} else
backup(2);
else if ('0' <= c && c <= '9') { // float literal
return finishNumberLiteral(nextChar(), true);
} else
backup(1);
return token(JavaTokenId.DOT);
case '~':
return token(JavaTokenId.TILDE);
case ',':
return token(JavaTokenId.COMMA);
case ';':
if (state != null) {
if (state >= 4 && state < 11) {
state = 3; // inside module decl
} else {
state = 1; // parsing module-info
}
}
return token(JavaTokenId.SEMICOLON);
case ':':
if (nextChar() == ':')
return token(JavaTokenId.COLONCOLON);
backup(1);
return token(JavaTokenId.COLON);
case '?':
return token(JavaTokenId.QUESTION);
case '(':
if (state != null && state >= 12) {
state++;
}
return token(JavaTokenId.LPAREN);
case ')':
if (state != null) {
if (state == 13) {
state = 1;
} else if (state > 13) {
state--;
}
}
return token(JavaTokenId.RPAREN);
case '[':
return token(JavaTokenId.LBRACKET);
case ']':
return token(JavaTokenId.RBRACKET);
case '{':
if (pendingStringLiteral != null ) {
pendingBraces++;
}
if (state != null && state == 2) {
state = 3; // inside module decl
}
return token(JavaTokenId.LBRACE);
case '}':
if (pendingStringLiteral != null && pendingBraces-- == 0) {
lookupId = pendingStringLiteral;
if (literalHistory == null) {
pendingStringLiteral = null;
pendingBraces = 0;
} else {
pendingStringLiteral = literalHistory.pendingStringLiteral;
pendingBraces = literalHistory.pendingBraces;
literalHistory = literalHistory.next;
}
stringLiteralContinuation = true;
break;
}
state = null;
return token(JavaTokenId.RBRACE);
case '@':
if (state != null && state == 1) {
state = 12; // after annotation
}
return token(JavaTokenId.AT);
case '0': // in a number literal
c = nextChar();
if (c == 'x' || c == 'X') { // in hexadecimal (possibly floating-point) literal
boolean inFraction = false;
boolean afterDigit = false;
while (true) {
switch (nextChar()) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
afterDigit = true;
break;
case '.': // hex float literal
if (!inFraction) {
inFraction = true;
afterDigit = false;
} else { // two dots in the float literal
return token(JavaTokenId.FLOAT_LITERAL_INVALID);
}
break;
case 'p': case 'P': // binary exponent
return finishFloatExponent();
case 'l': case 'L':
return token(JavaTokenId.LONG_LITERAL);
case '_':
if (this.version >= 7 && afterDigit) {
int cc = nextChar();
backup(1);
if (cc >= '0' && cc <= '9' || cc >= 'a' && cc <= 'f' || cc >= 'A' && cc <= 'F' || cc == '_') {
break;
}
}
default:
backup(1);
// if float then before mandatory binary exponent => invalid
return token(inFraction ? JavaTokenId.FLOAT_LITERAL_INVALID
: JavaTokenId.INT_LITERAL);
}
} // end of while(true)
} else if (this.version >= 7 && (c == 'b' || c == 'B')) { // in binary literal
boolean afterDigit = false;
while (true) {
switch (nextChar()) {
case '0': case '1':
afterDigit = true;
break;
case 'l': case 'L':
return token(JavaTokenId.LONG_LITERAL);
case '_':
if (afterDigit) {
int cc = nextChar();
backup(1);
if (cc == '0' || cc == '1' || cc == '_') {
break;
}
}
default:
backup(1);
return token(JavaTokenId.INT_LITERAL);
}
}
}
return finishNumberLiteral(c, false);
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return finishNumberLiteral(nextChar(), false);
// Keywords lexing
case 'a':
switch (c = nextChar()) {
case 'b':
if ((c = nextChar()) == 's'
&& (c = nextChar()) == 't'
&& (c = nextChar()) == 'r'
&& (c = nextChar()) == 'a'
&& (c = nextChar()) == 'c'
&& (c = nextChar()) == 't')
return keywordOrIdentifier(JavaTokenId.ABSTRACT);
break;
case 's':
if ((c = nextChar()) == 's'
&& (c = nextChar()) == 'e'
&& (c = nextChar()) == 'r'
&& (c = nextChar()) == 't')
return (version >= 4)
? keywordOrIdentifier(JavaTokenId.ASSERT)
: finishIdentifier();
break;
}
return finishIdentifier(c);
case 'b':
switch (c = nextChar()) {
case 'o':
if ((c = nextChar()) == 'o'
&& (c = nextChar()) == 'l'
&& (c = nextChar()) == 'e'
&& (c = nextChar()) == 'a'
&& (c = nextChar()) == 'n')
return keywordOrIdentifier(JavaTokenId.BOOLEAN);
break;
case 'r':
if ((c = nextChar()) == 'e'
&& (c = nextChar()) == 'a'
&& (c = nextChar()) == 'k')
return keywordOrIdentifier(JavaTokenId.BREAK);
break;
case 'y':
if ((c = nextChar()) == 't'
&& (c = nextChar()) == 'e')
return keywordOrIdentifier(JavaTokenId.BYTE);
break;
}
return finishIdentifier(c);
case 'c':
switch (c = nextChar()) {
case 'a':
switch (c = nextChar()) {
case 's':
if ((c = nextChar()) == 'e')
return keywordOrIdentifier(JavaTokenId.CASE);
break;
case 't':
if ((c = nextChar()) == 'c'
&& (c = nextChar()) == 'h')
return keywordOrIdentifier(JavaTokenId.CATCH);
break;
}
break;
case 'h':
if ((c = nextChar()) == 'a'
&& (c = nextChar()) == 'r')
return keywordOrIdentifier(JavaTokenId.CHAR);
break;
case 'l':
if ((c = nextChar()) == 'a'
&& (c = nextChar()) == 's'
&& (c = nextChar()) == 's')
return keywordOrIdentifier(JavaTokenId.CLASS);
break;
case 'o':
if ((c = nextChar()) == 'n') {
switch (c = nextChar()) {
case 's':
if ((c = nextChar()) == 't')
return keywordOrIdentifier(JavaTokenId.CONST);
break;
case 't':
if ((c = nextChar()) == 'i'
&& (c = nextChar()) == 'n'
&& (c = nextChar()) == 'u'
&& (c = nextChar()) == 'e')
return keywordOrIdentifier(JavaTokenId.CONTINUE);
break;
}
}
break;
}
return finishIdentifier(c);
case 'd':
switch (c = nextChar()) {
case 'e':
if ((c = nextChar()) == 'f'
&& (c = nextChar()) == 'a'
&& (c = nextChar()) == 'u'
&& (c = nextChar()) == 'l'
&& (c = nextChar()) == 't')
return keywordOrIdentifier(JavaTokenId.DEFAULT);
break;
case 'o':
switch (c = nextChar()) {
case 'u':
if ((c = nextChar()) == 'b'
&& (c = nextChar()) == 'l'
&& (c = nextChar()) == 'e')
return keywordOrIdentifier(JavaTokenId.DOUBLE);
break;
default:
return keywordOrIdentifier(JavaTokenId.DO, c);
}
break;
}
return finishIdentifier(c);
case 'e':
switch (c = nextChar()) {
case 'l':
if ((c = nextChar()) == 's'
&& (c = nextChar()) == 'e')
return keywordOrIdentifier(JavaTokenId.ELSE);
break;
case 'n':
if ((c = nextChar()) == 'u'
&& (c = nextChar()) == 'm')
return (version >= 5)
? keywordOrIdentifier(JavaTokenId.ENUM)
: finishIdentifier();
break;
case 'x':
switch (c = nextChar()) {
case 'p':
if ((c = nextChar()) == 'o'
&& (c = nextChar()) == 'r'
&& (c = nextChar()) == 't'
&& (c = nextChar()) == 's'
&& state != null && state == 3) {
Token kwOrId = keywordOrIdentifier(JavaTokenId.EXPORTS);
if (kwOrId.id() == JavaTokenId.EXPORTS) {
state = 5; // after exports
}
return kwOrId;
}
break;
case 't':
if ((c = nextChar()) == 'e'
&& (c = nextChar()) == 'n'
&& (c = nextChar()) == 'd'
&& (c = nextChar()) == 's')
return keywordOrIdentifier(JavaTokenId.EXTENDS);
break;
}
break;
}
return finishIdentifier(c);
case 'f':
switch (c = nextChar()) {
case 'a':
if ((c = nextChar()) == 'l'
&& (c = nextChar()) == 's'
&& (c = nextChar()) == 'e')
return keywordOrIdentifier(JavaTokenId.FALSE);
break;
case 'i':
if ((c = nextChar()) == 'n'
&& (c = nextChar()) == 'a'
&& (c = nextChar()) == 'l')
switch (c = nextChar()) {
case 'l':
if ((c = nextChar()) == 'y')
return keywordOrIdentifier(JavaTokenId.FINALLY);
break;
default:
return keywordOrIdentifier(JavaTokenId.FINAL, c);
}
break;
case 'l':
if ((c = nextChar()) == 'o'
&& (c = nextChar()) == 'a'
&& (c = nextChar()) == 't')
return keywordOrIdentifier(JavaTokenId.FLOAT);
break;
case 'o':
if ((c = nextChar()) == 'r')
return keywordOrIdentifier(JavaTokenId.FOR);
break;
}
return finishIdentifier(c);
case 'g':
if ((c = nextChar()) == 'o'
&& (c = nextChar()) == 't'
&& (c = nextChar()) == 'o')
return keywordOrIdentifier(JavaTokenId.GOTO);
return finishIdentifier(c);
case 'i':
switch (c = nextChar()) {
case 'f':
return keywordOrIdentifier(JavaTokenId.IF);
case 'm':
if ((c = nextChar()) == 'p') {
switch (c = nextChar()) {
case 'l':
if ((c = nextChar()) == 'e'
&& (c = nextChar()) == 'm'
&& (c = nextChar()) == 'e'
&& (c = nextChar()) == 'n'
&& (c = nextChar()) == 't'
&& (c = nextChar()) == 's')
return keywordOrIdentifier(JavaTokenId.IMPLEMENTS);
break;
case 'o':
if ((c = nextChar()) == 'r'
&& (c = nextChar()) == 't') {
if (state != null && state == 1) {
state = 11; // after import
}
return keywordOrIdentifier(JavaTokenId.IMPORT);
}
break;
}
}
break;
case 'n':
switch (c = nextChar()) {
case 's':
if ((c = nextChar()) == 't'
&& (c = nextChar()) == 'a'
&& (c = nextChar()) == 'n'
&& (c = nextChar()) == 'c'
&& (c = nextChar()) == 'e'
&& (c = nextChar()) == 'o'
&& (c = nextChar()) == 'f')
return keywordOrIdentifier(JavaTokenId.INSTANCEOF);
break;
case 't':
switch (c = nextChar()) {
case 'e':
if ((c = nextChar()) == 'r'
&& (c = nextChar()) == 'f'
&& (c = nextChar()) == 'a'
&& (c = nextChar()) == 'c'
&& (c = nextChar()) == 'e')
return keywordOrIdentifier(JavaTokenId.INTERFACE);
break;
default:
return keywordOrIdentifier(JavaTokenId.INT, c);
}
break;
}
break;
}
return finishIdentifier(c);
case 'l':
if ((c = nextChar()) == 'o'
&& (c = nextChar()) == 'n'
&& (c = nextChar()) == 'g')
return keywordOrIdentifier(JavaTokenId.LONG);
return finishIdentifier(c);
case 'm':
if ((c = nextChar()) == 'o'
&& (c = nextChar()) == 'd'
&& (c = nextChar()) == 'u'
&& (c = nextChar()) == 'l'
&& (c = nextChar()) == 'e'
&& state != null && state == 1) {
Token kwOrId = keywordOrIdentifier(JavaTokenId.MODULE);
if (kwOrId.id() == JavaTokenId.MODULE) {
state = 2; // after module
}
return kwOrId;
}
return finishIdentifier(c);
case 'n':
switch (c = nextChar()) {
case 'a':
if ((c = nextChar()) == 't'
&& (c = nextChar()) == 'i'
&& (c = nextChar()) == 'v'
&& (c = nextChar()) == 'e')
return keywordOrIdentifier(JavaTokenId.NATIVE);
break;
case 'e':
if ((c = nextChar()) == 'w')
return keywordOrIdentifier(JavaTokenId.NEW);
break;
case 'u':
if ((c = nextChar()) == 'l'
&& (c = nextChar()) == 'l')
return keywordOrIdentifier(JavaTokenId.NULL);
break;
}
return finishIdentifier(c);
case 'o':
if ((c = nextChar()) == 'p'
&& (c = nextChar()) == 'e'
&& (c = nextChar()) == 'n'
&& state != null && state >= 1)
switch (c = nextChar()) {
case 's':
if (state == 3) {
Token kwOrId = keywordOrIdentifier(JavaTokenId.OPENS);
if (kwOrId.id() == JavaTokenId.OPENS) {
state = 6; // after opens
}
return kwOrId;
}
break;
default:
if (state == 1) {
return keywordOrIdentifier(JavaTokenId.OPEN, c);
}
}
return finishIdentifier(c);
case 'p':
switch (c = nextChar()) {
case 'a':
if ((c = nextChar()) == 'c'
&& (c = nextChar()) == 'k'
&& (c = nextChar()) == 'a'
&& (c = nextChar()) == 'g'
&& (c = nextChar()) == 'e')
return keywordOrIdentifier(JavaTokenId.PACKAGE);
break;
case 'r':
switch (c = nextChar()) {
case 'i':
if ((c = nextChar()) == 'v'
&& (c = nextChar()) == 'a'
&& (c = nextChar()) == 't'
&& (c = nextChar()) == 'e')
return keywordOrIdentifier(JavaTokenId.PRIVATE);
break;
case 'o':
switch (c = nextChar()) {
case 't':
if ((c = nextChar()) == 'e'
&& (c = nextChar()) == 'c'
&& (c = nextChar()) == 't'
&& (c = nextChar()) == 'e'
&& (c = nextChar()) == 'd')
return keywordOrIdentifier(JavaTokenId.PROTECTED);
break;
case 'v':
if ((c = nextChar()) == 'i'
&& (c = nextChar()) == 'd'
&& (c = nextChar()) == 'e'
&& (c = nextChar()) == 's'
&& state != null && state == 3) {
Token kwOrId = keywordOrIdentifier(JavaTokenId.PROVIDES);
if (kwOrId.id() == JavaTokenId.PROVIDES) {
state = 8; // after provides
}
return kwOrId;
}
break;
}
break;
}
break;
case 'u':
if ((c = nextChar()) == 'b'
&& (c = nextChar()) == 'l'
&& (c = nextChar()) == 'i'
&& (c = nextChar()) == 'c')
return keywordOrIdentifier(JavaTokenId.PUBLIC);
break;
}
return finishIdentifier(c);
case 'r':
if ((c = nextChar()) == 'e') {
switch (c = nextChar()) {
case 'q':
if ((c = nextChar()) == 'u'
&& (c = nextChar()) == 'i'
&& (c = nextChar()) == 'r'
&& (c = nextChar()) == 'e'
&& (c = nextChar()) == 's'
&& state != null && state == 3) {
Token kwOrId = keywordOrIdentifier(JavaTokenId.REQUIRES);
if (kwOrId.id() == JavaTokenId.REQUIRES) {
state = 4; // after requires
}
return kwOrId;
}
break;
case 't':
if ((c = nextChar()) == 'u'
&& (c = nextChar()) == 'r'
&& (c = nextChar()) == 'n')
return keywordOrIdentifier(JavaTokenId.RETURN);
break;
}
}
return finishIdentifier(c);
case 's':
switch (c = nextChar()) {
case 'h':
if ((c = nextChar()) == 'o'
&& (c = nextChar()) == 'r'
&& (c = nextChar()) == 't')
return keywordOrIdentifier(JavaTokenId.SHORT);
break;
case 't':
switch (c = nextChar()) {
case 'a':
if ((c = nextChar()) == 't'
&& (c = nextChar()) == 'i'
&& (c = nextChar()) == 'c')
return keywordOrIdentifier(JavaTokenId.STATIC);
break;
case 'r':
if ((c = nextChar()) == 'i'
&& (c = nextChar()) == 'c'
&& (c = nextChar()) == 't'
&& (c = nextChar()) == 'f'
&& (c = nextChar()) == 'p')
return keywordOrIdentifier(JavaTokenId.STRICTFP);
break;
}
break;
case 'u':
if ((c = nextChar()) == 'p'
&& (c = nextChar()) == 'e'
&& (c = nextChar()) == 'r')
return keywordOrIdentifier(JavaTokenId.SUPER);
break;
case 'w':
if ((c = nextChar()) == 'i'
&& (c = nextChar()) == 't'
&& (c = nextChar()) == 'c'
&& (c = nextChar()) == 'h')
return keywordOrIdentifier(JavaTokenId.SWITCH);
break;
case 'y':
if ((c = nextChar()) == 'n'
&& (c = nextChar()) == 'c'
&& (c = nextChar()) == 'h'
&& (c = nextChar()) == 'r'
&& (c = nextChar()) == 'o'
&& (c = nextChar()) == 'n'
&& (c = nextChar()) == 'i'
&& (c = nextChar()) == 'z'
&& (c = nextChar()) == 'e'
&& (c = nextChar()) == 'd')
return keywordOrIdentifier(JavaTokenId.SYNCHRONIZED);
break;
}
return finishIdentifier(c);
case 't':
switch (c = nextChar()) {
case 'h':
switch (c = nextChar()) {
case 'i':
if ((c = nextChar()) == 's')
return keywordOrIdentifier(JavaTokenId.THIS);
break;
case 'r':
if ((c = nextChar()) == 'o'
&& (c = nextChar()) == 'w')
switch (c = nextChar()) {
case 's':
return keywordOrIdentifier(JavaTokenId.THROWS);
default:
return keywordOrIdentifier(JavaTokenId.THROW, c);
}
break;
}
break;
case 'o':
if (state != null && (state == 5 || state == 6)) {
Token kwOrId = keywordOrIdentifier(JavaTokenId.TO);
if (kwOrId.id() == JavaTokenId.TO) {
state = 9; // after to
}
return kwOrId;
}
break;
case 'r':
switch (c = nextChar()) {
case 'a':
if ((c = nextChar()) == 'n'
&& (c = nextChar()) == 's'
&& (c = nextChar()) == 'i') {
switch (c = nextChar()) {
case 'e':
if ((c = nextChar()) == 'n'
&& (c = nextChar()) == 't')
return keywordOrIdentifier(JavaTokenId.TRANSIENT);
break;
case 't':
if ((c = nextChar()) == 'i'
&& (c = nextChar()) == 'v'
&& (c = nextChar()) == 'e'
&& state != null && state == 4)
return keywordOrIdentifier(JavaTokenId.TRANSITIVE);
break;
}
}
break;
case 'u':
if ((c = nextChar()) == 'e')
return keywordOrIdentifier(JavaTokenId.TRUE);
break;
case 'y':
return keywordOrIdentifier(JavaTokenId.TRY);
}
break;
}
return finishIdentifier(c);
case 'u':
if ((c = nextChar()) == 's'
&& (c = nextChar()) == 'e'
&& (c = nextChar()) == 's'
&& state != null && state == 3) {
Token kwOrId = keywordOrIdentifier(JavaTokenId.USES);
if (kwOrId.id() == JavaTokenId.USES) {
state = 7; // after uses
}
return kwOrId;
}
return finishIdentifier(c);
case 'v':
switch ((c = nextChar())) {
case 'a':
if ((c = nextChar()) == 'r') {
c = nextChar();
// Check whether the given char is non-ident and if so then return keyword
if (c != EOF && !Character.isJavaIdentifierPart(c = translateSurrogates(c)) &&
version >= 10) {
// For surrogate 2 chars must be backed up
backup((c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) ? 2 : 1);
int len = input.readLength();
Token next = nextToken();
boolean varKeyword = false;
if (AFTER_VAR_TOKENS.contains(next.id())) {
do {
next = nextToken();
} while (next != null && AFTER_VAR_TOKENS.contains(next.id()));
varKeyword = next != null && next.id() == JavaTokenId.IDENTIFIER;
}
input.backup(input.readLengthEOF()- len);
assert input.readLength() == len;
if (varKeyword) {
return token(JavaTokenId.VAR);
}
} else {
// For surrogate 2 chars must be backed up
backup((c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) ? 2 : 1);
}
}
c = nextChar();
break;
case 'o':
switch (c = nextChar()) {
case 'i':
if ((c = nextChar()) == 'd')
return keywordOrIdentifier(JavaTokenId.VOID);
break;
case 'l':
if ((c = nextChar()) == 'a'
&& (c = nextChar()) == 't'
&& (c = nextChar()) == 'i'
&& (c = nextChar()) == 'l'
&& (c = nextChar()) == 'e')
return keywordOrIdentifier(JavaTokenId.VOLATILE);
break;
}
break;
}
return finishIdentifier(c);
case 'w':
switch (c = nextChar()) {
case 'h':
if ((c = nextChar()) == 'i'
&& (c = nextChar()) == 'l'
&& (c = nextChar()) == 'e')
return keywordOrIdentifier(JavaTokenId.WHILE);
break;
case 'i':
if ((c = nextChar()) == 't'
&& (c = nextChar()) == 'h'
&& state != null && state == 8) {
Token kwOrId = keywordOrIdentifier(JavaTokenId.WITH);
if (kwOrId.id() == JavaTokenId.WITH) {
state = 10; // after with
}
return kwOrId;
}
break;
}
return finishIdentifier(c);
// Rest of lowercase letters starting identifiers
case 'h': case 'j': case 'k':
case 'q': case 'x': case 'y': case 'z':
// Uppercase letters starting identifiers
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
case '$':
return finishIdentifier();
case '_':
if (this.version >= 9)
return keywordOrIdentifier(JavaTokenId.UNDERSCORE);
return finishIdentifier();
// All Character.isWhitespace(c) below 0x80 follow
// ['\t' - '\r'] and [0x1c - ' ']
case '\t':
case '\n':
case 0x0b:
case '\f':
case '\r':
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
if (state != null && state >= 12) {
state = 1;
}
return finishWhitespace();
case ' ':
c = nextChar();
if (c == EOF || !Character.isWhitespace(c)) { // Return single space as flyweight token
backup(1);
return input.readLength() == 1
? tokenFactory.getFlyweightToken(JavaTokenId.WHITESPACE, " ")
: tokenFactory.createToken(JavaTokenId.WHITESPACE);
}
return finishWhitespace();
case EOF:
return null;
default:
if (c >= 0x80) { // lowSurr ones already handled above
c = translateSurrogates(c);
if (Character.isJavaIdentifierStart(c))
return finishIdentifier();
if (Character.isWhitespace(c))
return finishWhitespace();
}
// Invalid char
return token(JavaTokenId.ERROR);
} // end of switch (c)
} // end of while(true)
}
private int translateSurrogates(int c) {
if (Character.isHighSurrogate((char)c)) {
int lowSurr = nextChar();
if (lowSurr != EOF && Character.isLowSurrogate((char)lowSurr)) {
// c and lowSurr form the integer unicode char.
c = Character.toCodePoint((char)c, (char)lowSurr);
} else {
// Otherwise it's error: Low surrogate does not follow the high one.
// Leave the original character unchanged.
// As the surrogates do not belong to any
// specific unicode category the lexer should finally
// categorize them as a lexical error.
backup(1);
}
}
return c;
}
private Token finishWhitespace() {
while (true) {
int c = nextChar();
// There should be no surrogates possible for whitespace
// so do not call translateSurrogates()
if (c == EOF || !Character.isWhitespace(c)) {
backup(1);
return tokenFactory.createToken(JavaTokenId.WHITESPACE);
}
}
}
private Token finishIdentifier() {
return finishIdentifier(nextChar());
}
private Token finishIdentifier(int c) {
while (true) {
if (c == EOF || !Character.isJavaIdentifierPart(c = translateSurrogates(c))) {
// For surrogate 2 chars must be backed up
backup((c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) ? 2 : 1);
return tokenFactory.createToken(JavaTokenId.IDENTIFIER);
}
c = nextChar();
}
}
private Token keywordOrIdentifier(JavaTokenId keywordId) {
return keywordOrIdentifier(keywordId, nextChar());
}
private Token keywordOrIdentifier(JavaTokenId keywordId, int c) {
// Check whether the given char is non-ident and if so then return keyword
if (c == EOF || !Character.isJavaIdentifierPart(c = translateSurrogates(c))) {
// For surrogate 2 chars must be backed up
backup((c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) ? 2 : 1);
return token(keywordId);
} else // c is identifier part
return finishIdentifier();
}
private Token finishNumberLiteral(int c, boolean inFraction) {
boolean afterDigit = true;
while (true) {
switch (c) {
case '.':
if (!inFraction) {
inFraction = true;
afterDigit = false;
} else { // two dots in the literal
return token(JavaTokenId.FLOAT_LITERAL_INVALID);
}
break;
case 'l': case 'L': // 0l or 0L
return token(JavaTokenId.LONG_LITERAL);
case 'd': case 'D':
return token(JavaTokenId.DOUBLE_LITERAL);
case 'f': case 'F':
return token(JavaTokenId.FLOAT_LITERAL);
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
afterDigit = true;
break;
case 'e': case 'E': // exponent part
return finishFloatExponent();
case '_':
if (this.version >= 7 && afterDigit) {
int cc = nextChar();
backup(1);
if (cc >= '0' && cc <= '9' || cc == '_') {
break;
}
}
default:
backup(1);
return token(inFraction ? JavaTokenId.DOUBLE_LITERAL
: JavaTokenId.INT_LITERAL);
}
c = nextChar();
}
}
private Token finishFloatExponent() {
int c = nextChar();
if (c == '+' || c == '-') {
c = nextChar();
}
if (c < '0' || '9' < c)
return token(JavaTokenId.FLOAT_LITERAL_INVALID);
do {
c = nextChar();
} while ('0' <= c && c <= '9'); // reading exponent
switch (c) {
case 'd': case 'D':
return token(JavaTokenId.DOUBLE_LITERAL);
case 'f': case 'F':
return token(JavaTokenId.FLOAT_LITERAL);
default:
backup(1);
return token(JavaTokenId.DOUBLE_LITERAL);
}
}
private Token finishJavadocLineRun() {
while (true) {
//finish current line:
LINE: while (true) {
switch (nextChar()) {
case '\r': consumeNewline();
case '\n': break LINE;
case EOF:
return token(JavaTokenId.JAVADOC_COMMENT_LINE_RUN);
}
}
//at the next line, if it starts with "///", include it in the run,
//otherwise finish the run:
int mark = input.readLength();
int c;
while (Character.isWhitespace(c = nextChar()) && c != '\r' && c != '\n' && c != EOF)
;
if (c != '/' || nextChar() != '/' || nextChar() != '/') {
input.backup(input.readLengthEOF()- mark);
return token(JavaTokenId.JAVADOC_COMMENT_LINE_RUN);
}
}
}
private Token token(JavaTokenId id) {
return token(id, PartType.COMPLETE);
}
private Token token(JavaTokenId id, PartType partType) {
String fixedText = id.fixedText();
return (fixedText != null && fixedText.length() == input.readLength() && partType == PartType.COMPLETE)
? tokenFactory.getFlyweightToken(id, fixedText)
: partType == PartType.COMPLETE ? tokenFactory.createToken(id)
: tokenFactory.createToken(id, input.readLength(), partType);
}
private static final Set AFTER_VAR_TOKENS = EnumSet.of(
JavaTokenId.BLOCK_COMMENT, JavaTokenId.JAVADOC_COMMENT,
JavaTokenId.LINE_COMMENT, JavaTokenId.WHITESPACE
);
// Get version as Integer x for version String 1.x
private Integer getVersionAsInt(String version) {
Integer ver = null;
if (version != null) {
try {
// expect format 1.x or x
if (version.startsWith("1.")) { //NOI18N
ver = Integer.parseInt(version.substring(2));
} else {
ver = Integer.parseInt(version);
}
} catch (NumberFormatException e) {
// should not happen if version is
// set using SourceLevelQuery,
// ignore other strings
}
}
return ver;
}
public void release() {
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy