eu.cqse.check.framework.shallowparser.util.VariableNameFragmentParser Maven / Gradle / Ivy
Show all versions of teamscale-check-api Show documentation
/*
* Copyright (c) CQSE GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.cqse.check.framework.shallowparser.util;
import static eu.cqse.check.framework.scanner.ETokenType.ARRAY_SEPARATOR;
import static eu.cqse.check.framework.scanner.ETokenType.COLON;
import static eu.cqse.check.framework.scanner.ETokenType.COMMA;
import static eu.cqse.check.framework.scanner.ETokenType.CONST;
import static eu.cqse.check.framework.scanner.ETokenType.DOUBLE_ARROW;
import static eu.cqse.check.framework.scanner.ETokenType.EOL;
import static eu.cqse.check.framework.scanner.ETokenType.EQ;
import static eu.cqse.check.framework.scanner.ETokenType.EQUAL;
import static eu.cqse.check.framework.scanner.ETokenType.GT;
import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
import static eu.cqse.check.framework.scanner.ETokenType.LBRACK;
import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
import static eu.cqse.check.framework.scanner.ETokenType.LT;
import static eu.cqse.check.framework.scanner.ETokenType.QUESTION;
import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;
import java.util.EnumSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import com.google.common.collect.ImmutableSet;
import eu.cqse.check.framework.scanner.ELanguage;
import eu.cqse.check.framework.scanner.ETokenType;
import eu.cqse.check.framework.scanner.IToken;
import eu.cqse.check.framework.shallowparser.languages.cs.CsShallowParser;
import eu.cqse.check.framework.shallowparser.languages.java.JavaShallowParser;
import eu.cqse.check.framework.shallowparser.languages.javascript.JavaScriptShallowParser;
import eu.cqse.check.framework.util.CppLanguageFeatureParser;
/**
* Parser for fragments of a variable declaration such as "int a;", "int b,",
* "PairList l=b;", "a?:String=''".
*/
class VariableNameFragmentParser {
/**
* EnumSet containing every language in which it is possible that the type info of a variable or
* parameter might be after a colon.
*
* E.g. test(param1 : String)
*/
private static final EnumSet LANGUAGES_TYPE_INFO_AFTER_COLON = EnumSet.of(ELanguage.JAVASCRIPT,
ELanguage.GOSU, ELanguage.KOTLIN, ELanguage.SWIFT);
/**
* A set containing C# contextual keywords. A contextual keyword is used to provide a specific
* meaning in the code, but it is not a reserved word in C#. Thus, one could name a local variable
* or method using a contextual keyword.
*/
private static final Set CONTEXTUAL_KEYWORDS = ImmutableSet.of(ETokenType.ADD, ETokenType.ALIAS,
ETokenType.ASCENDING, ETokenType.ASYNC, ETokenType.AWAIT, ETokenType.DESCENDING, ETokenType.DYNAMIC,
ETokenType.FROM, ETokenType.GET, ETokenType.GLOBAL, ETokenType.GROUP, ETokenType.INTO, ETokenType.JOIN,
ETokenType.LET, ETokenType.ORDERBY, ETokenType.PARTIAL, ETokenType.REMOVE, ETokenType.SELECT,
ETokenType.SET, ETokenType.VALUE, ETokenType.VAR, ETokenType.WHEN, ETokenType.WHERE, ETokenType.YIELD);
private static final EnumSet OPENING_TOKENS = EnumSet.of(LBRACK, LT, LPAREN, LBRACE);
private static final EnumSet CLOSING_TOKENS = EnumSet.of(RBRACK, GT, RPAREN, RBRACE);
/**
* Token types that typically indicate that the previous token was a variable name.
*/
private static final EnumSet VARIABLE_NAME_SUCCESSOR_TYPES = EnumSet.of(COMMA,
// the commas in matlab methods are parsed as array separators
ARRAY_SEPARATOR, EQ, EOL, EQUAL, SEMICOLON);
private final List tokens;
private final boolean ignoreParameterTypes;
private int parenthesisNesting = 0;
private IToken previousToken = null;
private ETokenType beforePreviousType = null;
private VariableNameFragmentParser(List declarationFragment, boolean ignoreParameterTypes) {
tokens = declarationFragment;
this.ignoreParameterTypes = ignoreParameterTypes;
}
/**
* Extracts the token that defines the name of the variable that is declared in the given
* declaration fragment. Such a fragment might be "int a;", "int b,", "PairList
* l=b;", "a?:String=''"
*/
public static Optional extract(List declarationFragment, boolean ignoreParameterTypes) {
return new VariableNameFragmentParser(declarationFragment, ignoreParameterTypes).extractVariable();
}
private Optional extractVariable() {
for (IToken token : tokens) {
if (handleParenthesis(token)) {
continue;
}
if (token.getType() == QUESTION) {
// skip question mark (optional parameter token) between parameter name and
// colon (TypeScript)
updatePreviousToken(previousToken);
continue;
}
// In TypeScript you can add type information after colon
boolean startOfTypeInfo = token.getType() == COLON
&& LANGUAGES_TYPE_INFO_AFTER_COLON.contains(token.getLanguage());
if ((startOfTypeInfo || VARIABLE_NAME_SUCCESSOR_TYPES.contains(token.getType())) && isNonNestedVariable()) {
return Optional.of(previousToken);
}
// In C# variable declarations a double arrow (=>) signifies a so-called
// "expression-bodied member". This allows to define a field getter directly
// when declaring a field.
// E.g., public String LoggingProvider => nameof(Framework).
// Once we encounter the double arrow token, there's no need to further search
// for variable names in this segment.
if (token.getLanguage() == ELanguage.CS && token.getType() == DOUBLE_ARROW) {
break;
}
updatePreviousToken(token);
}
if (isNonNestedVariable()) {
return Optional.of(previousToken);
}
return Optional.empty();
}
private boolean handleParenthesis(IToken token) {
if (OPENING_TOKENS.contains(token.getType())) {
parenthesisNesting += 1;
updatePreviousToken(token);
return true;
} else if (CLOSING_TOKENS.contains(token.getType())) {
parenthesisNesting -= 1;
// do not update previousToken
return true;
}
return false;
}
private void updatePreviousToken(IToken token) {
if (parenthesisNesting == 0) {
if (previousToken != null) {
// In TypeScript you can add type information after colon
if (ignoreParameterTypes && previousToken.getType() == ETokenType.COLON
&& LANGUAGES_TYPE_INFO_AFTER_COLON.contains(token.getLanguage())) {
previousToken = null;
beforePreviousType = null;
return;
}
beforePreviousType = previousToken.getType();
}
previousToken = token;
}
}
private boolean isNonNestedVariable() {
return parenthesisNesting == 0 && isVariableName(previousToken, beforePreviousType);
}
/** Returns whether the given token is a variable name. */
private boolean isVariableName(IToken token, ETokenType beforeTokenType) {
if (token == null || isCppParameterType(token, beforeTokenType)) {
return false;
}
if (isValidIdentifier(token)) {
return true;
}
// Handle contextual keywords that can also act as identifier names
if (token.getLanguage() == ELanguage.JAVASCRIPT) {
return JavaScriptShallowParser.ALL_IDENTIFIERS.contains(token.getType());
} else if (token.getLanguage() == ELanguage.CS) {
return CONTEXTUAL_KEYWORDS.contains(token.getType());
}
return false;
}
/**
* Checks if the given token type is a valid identifier.
*/
private boolean isValidIdentifier(IToken token) {
switch (token.getLanguage()) {
case CPP:
if (CppLanguageFeatureParser.VALID_IDENTIFIERS_CPP.contains(token.getType())) {
return true;
}
break;
case C:
if (CppLanguageFeatureParser.VALID_IDENTIFIERS_C.contains(token.getType())) {
return true;
}
break;
case CS:
if (CsShallowParser.VALID_IDENTIFIERS.contains(token.getType())) {
return true;
}
break;
case JAVA:
if (JavaShallowParser.VALID_JAVA_IDENTIFIERS.contains(token.getType())) {
return true;
}
break;
default:
break;
}
return token.getType() == ETokenType.IDENTIFIER;
}
/**
* Returns whether the given {@code token} represents the type of a C++ parameter. If true, ignores
* a missing parameter name and detects namespaced variables like
* const int C1::CONST2 = 17
*/
private boolean isCppParameterType(IToken token, ETokenType beforeTokenType) {
return token.getLanguage().isCppOrC() && ignoreParameterTypes && (beforeTokenType == ETokenType.COMMA
|| beforeTokenType == null || beforeTokenType == ETokenType.SCOPE || beforeTokenType == CONST);
}
}