All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.cqse.check.framework.util.python.PythonVariableNameExtractor Maven / Gradle / Ivy

Go to download

The Teamscale Custom Check API allows users to extend Teamscale by writing custom analyses that create findings.

There is a newer version: 2024.7.2
Show newest version
/*
 * Copyright (c) CQSE GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package eu.cqse.check.framework.util.python;

import static eu.cqse.check.framework.scanner.ETokenType.AS;
import static eu.cqse.check.framework.scanner.ETokenType.COLON;
import static eu.cqse.check.framework.scanner.ETokenType.COMMA;
import static eu.cqse.check.framework.scanner.ETokenType.DOT;
import static eu.cqse.check.framework.scanner.ETokenType.EQ;
import static eu.cqse.check.framework.scanner.ETokenType.FOR;
import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
import static eu.cqse.check.framework.scanner.ETokenType.IN;
import static eu.cqse.check.framework.scanner.ETokenType.LAMBDA;
import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
import static eu.cqse.check.framework.scanner.ETokenType.LBRACK;
import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
import static eu.cqse.check.framework.scanner.ETokenType.MULT;
import static eu.cqse.check.framework.scanner.ETokenType.POWER;
import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;

import org.conqat.lib.commons.collections.CollectionUtils;

import eu.cqse.check.framework.scanner.ETokenType;
import eu.cqse.check.framework.scanner.IToken;
import eu.cqse.check.framework.shallowparser.SubTypeNames;
import eu.cqse.check.framework.shallowparser.TokenStreamUtils;
import eu.cqse.check.framework.shallowparser.framework.ShallowEntity;

/**
 * A class that extracts declared variable names from shallow entities in python. Currently
 * extraction is implemented for.
 *
 * 
    *
  • expect statements
  • *
  • for statements
  • *
  • with statements
  • *
  • method parameters
  • *
  • lambda parameters
  • *
  • variable and attribute declarations (with destructuring)
  • *
*

* For loops within list comprehensions are currently not supported. */ public class PythonVariableNameExtractor { /** * Marker tokens in method argument lists, as of * Python * Documentation */ private static final EnumSet MARKER_TOKENS = EnumSet.of(MULT, ETokenType.SLASH); /** * Extracts declared variable names from the given entity. */ public List extractVariableNames(ShallowEntity entity) { switch (entity.getType()) { case ATTRIBUTE: return extractFromDeclarationStatement(entity.ownStartTokens()); case STATEMENT: return extractFromStatement(entity); case METHOD: return extractFromMethodOrLambda(entity); default: return CollectionUtils.emptyList(); } } /** * Extracts declared variable names from the given statement. */ private static List extractFromStatement(ShallowEntity entity) { switch (entity.getSubtype()) { case SubTypeNames.SIMPLE_STATEMENT: return extractFromDeclarationStatement(entity.ownStartTokens()); case SubTypeNames.FOR: return extractFromForStatement(entity.ownStartTokens()); case SubTypeNames.WITH: case SubTypeNames.EXCEPT: return extractFromStatementWithAs(entity.ownStartTokens()); default: return CollectionUtils.emptyList(); } } /** * Extracts declared variable names from a declaration statement. */ private static List extractFromDeclarationStatement(List tokens) { int index = TokenStreamUtils.findFirstTopLevel(tokens, // EQ for regular assignment, COLON for variable annotation EnumSet.of(EQ, COLON), Collections.singletonList(LPAREN), Collections.singletonList(RPAREN)); if (index == TokenStreamUtils.NOT_FOUND) { // In this case there is no variable declaration return CollectionUtils.emptyList(); } List leftHandSideTokens = tokens.subList(0, index); // If the left-hand-side contains any commas, it must be a tuple assignment // (possible with or without parentheses/brackets) // e.g. a,b = 1,2 ; [a, b] = [1, 2] ; (1,2) = (1,2) if (TokenStreamUtils.containsAny(leftHandSideTokens, COMMA)) { return extractIdentifiersFromTupleAssignment(leftHandSideTokens); } return extractIdentifiers(leftHandSideTokens); } /** * Extract the identifiers from a tuple assignment which are actually used to declare a new * variable. * * @param tokens * list of tokens from the left-hand-side of a tuple assignment (possible nested with * parentheses or brackets) * @return list of tokens which are identifiers that are actually used as the declaration of a new * variable */ private static List extractIdentifiersFromTupleAssignment(List tokens) { // Remove parentheses/brackets at beginning and end, then recursive call with // remaining tokens if (TokenStreamUtils.startsWith(tokens, LPAREN)) { tokens = TokenStreamUtils.removeAtFront(tokens, LPAREN); tokens = TokenStreamUtils.removeAtEnd(tokens, RPAREN); return extractIdentifiersFromTupleAssignment(tokens); } else if (TokenStreamUtils.startsWith(tokens, LBRACK)) { tokens = TokenStreamUtils.removeAtFront(tokens, LBRACK); tokens = TokenStreamUtils.removeAtEnd(tokens, RBRACK); return extractIdentifiersFromTupleAssignment(tokens); } // Plain tuple assignment (possibly still nested), e.g. // a, (b, c) = 1, (2, 3) if (TokenStreamUtils.contains(tokens, COMMA)) { List> splitTokenLists = TokenStreamUtils.split(tokens, COMMA); List> plainTokenLists = new ArrayList<>(); for (List tokenList : splitTokenLists) { plainTokenLists.add(extractIdentifiersFromTupleAssignment(tokenList)); } List identifiers = new ArrayList<>(); for (List plainTokenList : plainTokenLists) { identifiers.addAll(extractIdentifiers(plainTokenList)); } return identifiers; } // No more commas left // -> we have reached the plain identifiers // or access of attributes or array/dictionary entries, i.e. a.str or // a[int("1")] return extractIdentifiers(tokens); } /** * Extracts declared variable names from a for statement. */ private static List extractFromForStatement(List tokens) { return CollectionUtils.filter(TokenStreamUtils.tokensBetween(tokens, FOR, IN), token -> token.getType() == IDENTIFIER); } /** * Extracts declared variables names from a statement that introduces variables with as. This * includes with and except statements. */ private static List extractFromStatementWithAs(List tokens) { int index = TokenStreamUtils.firstTokenOfTypeSequence(tokens, 2, AS, IDENTIFIER); if (index == TokenStreamUtils.NOT_FOUND) { return CollectionUtils.emptyList(); } return tokens.subList(index + 1, index + 2); } /** * Extracts parameter names from a method or lambda. */ public List extractFromMethodOrLambda(ShallowEntity methodOrLambda) { if (SubTypeNames.LAMBDA.equals(methodOrLambda.getSubtype())) { return extractFromLambda(methodOrLambda); } return extractFromMethod(methodOrLambda); } /** * Extracts parameter names from a lambda. */ private static List extractFromLambda(ShallowEntity lambda) { return extractIdentifiers(TokenStreamUtils.tokensBetween(lambda.ownStartTokens(), LAMBDA, COLON)); } /** * Extracts parameter names from a method. */ private static List extractFromMethod(ShallowEntity method) { List> splitParameterTokens = getSplitParameterTokens(method); return CollectionUtils.filterAndMap(splitParameterTokens, tokens -> !tokens.isEmpty() && !(tokens.size() == 1 && MARKER_TOKENS.contains(tokens.get(0).getType())), tokens -> { IToken idToken = tokens.get(0); if (idToken.getType() == MULT || idToken.getType() == POWER) { return tokens.get(1); } return idToken; }); } /** * Returns the split parameter tokens from the given method. Returns a list of token list where each * inner list represents all tokens of one parameter. */ private static List> getSplitParameterTokens(ShallowEntity method) { List parameterTokens = TokenStreamUtils.tokensBetweenWithNesting(method.includedTokens(), 2, LPAREN, RPAREN); if (parameterTokens.isEmpty()) { return CollectionUtils.emptyList(); } List openingTypes = new ArrayList<>(Arrays.asList(LPAREN, LBRACK, LBRACE, LAMBDA)); List closingTypes = new ArrayList<>(Arrays.asList(RPAREN, RBRACK, RBRACE, COLON)); // we need to handle nesting between lambdas and colon to handle lambda // default parameter properly return TokenStreamUtils.splitWithNesting(parameterTokens, COMMA, openingTypes, closingTypes); } /** * Extracts all identifiers from the given tokens. */ private static List extractIdentifiers(List tokens) { // If the remaining token list contains // - a bracket, it is an array or dictionary access // - a parenthesis, it is a function call // - a dot, it is an attribute access // None of these declare a new variable. if (TokenStreamUtils.containsAny(tokens, EnumSet.of(LBRACK, LPAREN, DOT))) { return CollectionUtils.emptyList(); } return CollectionUtils.filter(tokens, token -> token.getType() == IDENTIFIER); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy