All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.cqse.check.framework.shallowparser.util.VariableNameFragmentParser Maven / Gradle / Ivy

Go to download

The Teamscale Custom Check API allows users to extend Teamscale by writing custom analyses that create findings.

There is a newer version: 2024.7.2
Show newest version
/*
 * Copyright (c) CQSE GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package eu.cqse.check.framework.shallowparser.util;

import static eu.cqse.check.framework.scanner.ETokenType.ARRAY_SEPARATOR;
import static eu.cqse.check.framework.scanner.ETokenType.COLON;
import static eu.cqse.check.framework.scanner.ETokenType.COMMA;
import static eu.cqse.check.framework.scanner.ETokenType.CONST;
import static eu.cqse.check.framework.scanner.ETokenType.DOUBLE_ARROW;
import static eu.cqse.check.framework.scanner.ETokenType.EOL;
import static eu.cqse.check.framework.scanner.ETokenType.EQ;
import static eu.cqse.check.framework.scanner.ETokenType.EQUAL;
import static eu.cqse.check.framework.scanner.ETokenType.GT;
import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
import static eu.cqse.check.framework.scanner.ETokenType.LBRACK;
import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
import static eu.cqse.check.framework.scanner.ETokenType.LT;
import static eu.cqse.check.framework.scanner.ETokenType.QUESTION;
import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;

import java.util.EnumSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;

import com.google.common.collect.ImmutableSet;

import eu.cqse.check.framework.scanner.ELanguage;
import eu.cqse.check.framework.scanner.ETokenType;
import eu.cqse.check.framework.scanner.IToken;
import eu.cqse.check.framework.shallowparser.languages.cs.CsShallowParser;
import eu.cqse.check.framework.shallowparser.languages.java.JavaShallowParser;
import eu.cqse.check.framework.shallowparser.languages.javascript.JavaScriptShallowParser;
import eu.cqse.check.framework.util.CppLanguageFeatureParser;

/**
 * Parser for fragments of a variable declaration such as "int a;", "int b,",
 * "PairList l=b;", "a?:String=''".
 */
class VariableNameFragmentParser {

	/**
	 * EnumSet containing every language in which it is possible that the type info of a variable or
	 * parameter might be after a colon.
	 * 

* E.g. test(param1 : String) */ private static final EnumSet LANGUAGES_TYPE_INFO_AFTER_COLON = EnumSet.of(ELanguage.JAVASCRIPT, ELanguage.GOSU, ELanguage.KOTLIN, ELanguage.SWIFT); /** * A set containing C# contextual keywords. A contextual keyword is used to provide a specific * meaning in the code, but it is not a reserved word in C#. Thus, one could name a local variable * or method using a contextual keyword. */ private static final Set CONTEXTUAL_KEYWORDS = ImmutableSet.of(ETokenType.ADD, ETokenType.ALIAS, ETokenType.ASCENDING, ETokenType.ASYNC, ETokenType.AWAIT, ETokenType.DESCENDING, ETokenType.DYNAMIC, ETokenType.FROM, ETokenType.GET, ETokenType.GLOBAL, ETokenType.GROUP, ETokenType.INTO, ETokenType.JOIN, ETokenType.LET, ETokenType.ORDERBY, ETokenType.PARTIAL, ETokenType.REMOVE, ETokenType.SELECT, ETokenType.SET, ETokenType.VALUE, ETokenType.VAR, ETokenType.WHEN, ETokenType.WHERE, ETokenType.YIELD); private static final EnumSet OPENING_TOKENS = EnumSet.of(LBRACK, LT, LPAREN, LBRACE); private static final EnumSet CLOSING_TOKENS = EnumSet.of(RBRACK, GT, RPAREN, RBRACE); /** * Token types that typically indicate that the previous token was a variable name. */ private static final EnumSet VARIABLE_NAME_SUCCESSOR_TYPES = EnumSet.of(COMMA, // the commas in matlab methods are parsed as array separators ARRAY_SEPARATOR, EQ, EOL, EQUAL, SEMICOLON); private final List tokens; private final boolean ignoreParameterTypes; private int parenthesisNesting = 0; private IToken previousToken = null; private ETokenType beforePreviousType = null; private VariableNameFragmentParser(List declarationFragment, boolean ignoreParameterTypes) { tokens = declarationFragment; this.ignoreParameterTypes = ignoreParameterTypes; } /** * Extracts the token that defines the name of the variable that is declared in the given * declaration fragment. Such a fragment might be "int a;", "int b,", "PairList * l=b;", "a?:String=''" */ public static Optional extract(List declarationFragment, boolean ignoreParameterTypes) { return new VariableNameFragmentParser(declarationFragment, ignoreParameterTypes).extractVariable(); } private Optional extractVariable() { for (IToken token : tokens) { if (handleParenthesis(token)) { continue; } if (token.getType() == QUESTION) { // skip question mark (optional parameter token) between parameter name and // colon (TypeScript) updatePreviousToken(previousToken); continue; } // In TypeScript you can add type information after colon boolean startOfTypeInfo = token.getType() == COLON && LANGUAGES_TYPE_INFO_AFTER_COLON.contains(token.getLanguage()); if ((startOfTypeInfo || VARIABLE_NAME_SUCCESSOR_TYPES.contains(token.getType())) && isNonNestedVariable()) { return Optional.of(previousToken); } // In C# variable declarations a double arrow (=>) signifies a so-called // "expression-bodied member". This allows to define a field getter directly // when declaring a field. // E.g., public String LoggingProvider => nameof(Framework). // Once we encounter the double arrow token, there's no need to further search // for variable names in this segment. if (token.getLanguage() == ELanguage.CS && token.getType() == DOUBLE_ARROW) { break; } updatePreviousToken(token); } if (isNonNestedVariable()) { return Optional.of(previousToken); } return Optional.empty(); } private boolean handleParenthesis(IToken token) { if (OPENING_TOKENS.contains(token.getType())) { parenthesisNesting += 1; updatePreviousToken(token); return true; } else if (CLOSING_TOKENS.contains(token.getType())) { parenthesisNesting -= 1; // do not update previousToken return true; } return false; } private void updatePreviousToken(IToken token) { if (parenthesisNesting == 0) { if (previousToken != null) { // In TypeScript you can add type information after colon if (ignoreParameterTypes && previousToken.getType() == ETokenType.COLON && LANGUAGES_TYPE_INFO_AFTER_COLON.contains(token.getLanguage())) { previousToken = null; beforePreviousType = null; return; } beforePreviousType = previousToken.getType(); } previousToken = token; } } private boolean isNonNestedVariable() { return parenthesisNesting == 0 && isVariableName(previousToken, beforePreviousType); } /** Returns whether the given token is a variable name. */ private boolean isVariableName(IToken token, ETokenType beforeTokenType) { if (token == null || isCppParameterType(token, beforeTokenType)) { return false; } if (isValidIdentifier(token)) { return true; } // Handle contextual keywords that can also act as identifier names if (token.getLanguage() == ELanguage.JAVASCRIPT) { return JavaScriptShallowParser.ALL_IDENTIFIERS.contains(token.getType()); } else if (token.getLanguage() == ELanguage.CS) { return CONTEXTUAL_KEYWORDS.contains(token.getType()); } return false; } /** * Checks if the given token type is a valid identifier. */ private boolean isValidIdentifier(IToken token) { switch (token.getLanguage()) { case CPP: if (CppLanguageFeatureParser.VALID_IDENTIFIERS_CPP.contains(token.getType())) { return true; } break; case C: if (CppLanguageFeatureParser.VALID_IDENTIFIERS_C.contains(token.getType())) { return true; } break; case CS: if (CsShallowParser.VALID_IDENTIFIERS.contains(token.getType())) { return true; } break; case JAVA: if (JavaShallowParser.VALID_JAVA_IDENTIFIERS.contains(token.getType())) { return true; } break; default: break; } return token.getType() == ETokenType.IDENTIFIER; } /** * Returns whether the given {@code token} represents the type of a C++ parameter. If true, ignores * a missing parameter name and detects namespaced variables like * const int C1::CONST2 = 17 */ private boolean isCppParameterType(IToken token, ETokenType beforeTokenType) { return token.getLanguage().isCppOrC() && ignoreParameterTypes && (beforeTokenType == ETokenType.COMMA || beforeTokenType == null || beforeTokenType == ETokenType.SCOPE || beforeTokenType == CONST); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy