eu.cqse.check.framework.util.python.PythonVariableNameExtractor Maven / Gradle / Ivy
Show all versions of teamscale-check-api Show documentation
/*
* Copyright (c) CQSE GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.cqse.check.framework.util.python;
import static eu.cqse.check.framework.scanner.ETokenType.AS;
import static eu.cqse.check.framework.scanner.ETokenType.COLON;
import static eu.cqse.check.framework.scanner.ETokenType.COMMA;
import static eu.cqse.check.framework.scanner.ETokenType.DOT;
import static eu.cqse.check.framework.scanner.ETokenType.EQ;
import static eu.cqse.check.framework.scanner.ETokenType.FOR;
import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
import static eu.cqse.check.framework.scanner.ETokenType.IN;
import static eu.cqse.check.framework.scanner.ETokenType.LAMBDA;
import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
import static eu.cqse.check.framework.scanner.ETokenType.LBRACK;
import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
import static eu.cqse.check.framework.scanner.ETokenType.MULT;
import static eu.cqse.check.framework.scanner.ETokenType.POWER;
import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import org.conqat.lib.commons.collections.CollectionUtils;
import eu.cqse.check.framework.scanner.ETokenType;
import eu.cqse.check.framework.scanner.IToken;
import eu.cqse.check.framework.shallowparser.SubTypeNames;
import eu.cqse.check.framework.shallowparser.TokenStreamUtils;
import eu.cqse.check.framework.shallowparser.framework.ShallowEntity;
/**
* A class that extracts declared variable names from shallow entities in python. Currently
* extraction is implemented for.
*
*
* - expect statements
* - for statements
* - with statements
* - method parameters
* - lambda parameters
* - variable and attribute declarations (with destructuring)
*
*
* For loops within list comprehensions are currently not supported.
*/
public class PythonVariableNameExtractor {
/**
* Marker tokens in method argument lists, as of
* Python
* Documentation
*/
private static final EnumSet MARKER_TOKENS = EnumSet.of(MULT, ETokenType.SLASH);
/**
* Extracts declared variable names from the given entity.
*/
public List extractVariableNames(ShallowEntity entity) {
switch (entity.getType()) {
case ATTRIBUTE:
return extractFromDeclarationStatement(entity.ownStartTokens());
case STATEMENT:
return extractFromStatement(entity);
case METHOD:
return extractFromMethodOrLambda(entity);
default:
return CollectionUtils.emptyList();
}
}
/**
* Extracts declared variable names from the given statement.
*/
private static List extractFromStatement(ShallowEntity entity) {
switch (entity.getSubtype()) {
case SubTypeNames.SIMPLE_STATEMENT:
return extractFromDeclarationStatement(entity.ownStartTokens());
case SubTypeNames.FOR:
return extractFromForStatement(entity.ownStartTokens());
case SubTypeNames.WITH:
case SubTypeNames.EXCEPT:
return extractFromStatementWithAs(entity.ownStartTokens());
default:
return CollectionUtils.emptyList();
}
}
/**
* Extracts declared variable names from a declaration statement.
*/
private static List extractFromDeclarationStatement(List tokens) {
int index = TokenStreamUtils.findFirstTopLevel(tokens,
// EQ for regular assignment, COLON for variable annotation
EnumSet.of(EQ, COLON), Collections.singletonList(LPAREN), Collections.singletonList(RPAREN));
if (index == TokenStreamUtils.NOT_FOUND) {
// In this case there is no variable declaration
return CollectionUtils.emptyList();
}
List leftHandSideTokens = tokens.subList(0, index);
// If the left-hand-side contains any commas, it must be a tuple assignment
// (possible with or without parentheses/brackets)
// e.g. a,b = 1,2 ; [a, b] = [1, 2] ; (1,2) = (1,2)
if (TokenStreamUtils.containsAny(leftHandSideTokens, COMMA)) {
return extractIdentifiersFromTupleAssignment(leftHandSideTokens);
}
return extractIdentifiers(leftHandSideTokens);
}
/**
* Extract the identifiers from a tuple assignment which are actually used to declare a new
* variable.
*
* @param tokens
* list of tokens from the left-hand-side of a tuple assignment (possible nested with
* parentheses or brackets)
* @return list of tokens which are identifiers that are actually used as the declaration of a new
* variable
*/
private static List extractIdentifiersFromTupleAssignment(List tokens) {
// Remove parentheses/brackets at beginning and end, then recursive call with
// remaining tokens
if (TokenStreamUtils.startsWith(tokens, LPAREN)) {
tokens = TokenStreamUtils.removeAtFront(tokens, LPAREN);
tokens = TokenStreamUtils.removeAtEnd(tokens, RPAREN);
return extractIdentifiersFromTupleAssignment(tokens);
} else if (TokenStreamUtils.startsWith(tokens, LBRACK)) {
tokens = TokenStreamUtils.removeAtFront(tokens, LBRACK);
tokens = TokenStreamUtils.removeAtEnd(tokens, RBRACK);
return extractIdentifiersFromTupleAssignment(tokens);
}
// Plain tuple assignment (possibly still nested), e.g.
// a, (b, c) = 1, (2, 3)
if (TokenStreamUtils.contains(tokens, COMMA)) {
List> splitTokenLists = TokenStreamUtils.split(tokens, COMMA);
List> plainTokenLists = new ArrayList<>();
for (List tokenList : splitTokenLists) {
plainTokenLists.add(extractIdentifiersFromTupleAssignment(tokenList));
}
List identifiers = new ArrayList<>();
for (List plainTokenList : plainTokenLists) {
identifiers.addAll(extractIdentifiers(plainTokenList));
}
return identifiers;
}
// No more commas left
// -> we have reached the plain identifiers
// or access of attributes or array/dictionary entries, i.e. a.str or
// a[int("1")]
return extractIdentifiers(tokens);
}
/**
* Extracts declared variable names from a for statement.
*/
private static List extractFromForStatement(List tokens) {
return CollectionUtils.filter(TokenStreamUtils.tokensBetween(tokens, FOR, IN),
token -> token.getType() == IDENTIFIER);
}
/**
* Extracts declared variables names from a statement that introduces variables with as. This
* includes with and except statements.
*/
private static List extractFromStatementWithAs(List tokens) {
int index = TokenStreamUtils.firstTokenOfTypeSequence(tokens, 2, AS, IDENTIFIER);
if (index == TokenStreamUtils.NOT_FOUND) {
return CollectionUtils.emptyList();
}
return tokens.subList(index + 1, index + 2);
}
/**
* Extracts parameter names from a method or lambda.
*/
public List extractFromMethodOrLambda(ShallowEntity methodOrLambda) {
if (SubTypeNames.LAMBDA.equals(methodOrLambda.getSubtype())) {
return extractFromLambda(methodOrLambda);
}
return extractFromMethod(methodOrLambda);
}
/**
* Extracts parameter names from a lambda.
*/
private static List extractFromLambda(ShallowEntity lambda) {
return extractIdentifiers(TokenStreamUtils.tokensBetween(lambda.ownStartTokens(), LAMBDA, COLON));
}
/**
* Extracts parameter names from a method.
*/
private static List extractFromMethod(ShallowEntity method) {
List> splitParameterTokens = getSplitParameterTokens(method);
return CollectionUtils.filterAndMap(splitParameterTokens,
tokens -> !tokens.isEmpty() && !(tokens.size() == 1 && MARKER_TOKENS.contains(tokens.get(0).getType())),
tokens -> {
IToken idToken = tokens.get(0);
if (idToken.getType() == MULT || idToken.getType() == POWER) {
return tokens.get(1);
}
return idToken;
});
}
/**
* Returns the split parameter tokens from the given method. Returns a list of token list where each
* inner list represents all tokens of one parameter.
*/
private static List> getSplitParameterTokens(ShallowEntity method) {
List parameterTokens = TokenStreamUtils.tokensBetweenWithNesting(method.includedTokens(), 2, LPAREN,
RPAREN);
if (parameterTokens.isEmpty()) {
return CollectionUtils.emptyList();
}
List openingTypes = new ArrayList<>(Arrays.asList(LPAREN, LBRACK, LBRACE, LAMBDA));
List closingTypes = new ArrayList<>(Arrays.asList(RPAREN, RBRACK, RBRACE, COLON));
// we need to handle nesting between lambdas and colon to handle lambda
// default parameter properly
return TokenStreamUtils.splitWithNesting(parameterTokens, COMMA, openingTypes, closingTypes);
}
/**
* Extracts all identifiers from the given tokens.
*/
private static List extractIdentifiers(List tokens) {
// If the remaining token list contains
// - a bracket, it is an array or dictionary access
// - a parenthesis, it is a function call
// - a dot, it is an attribute access
// None of these declare a new variable.
if (TokenStreamUtils.containsAny(tokens, EnumSet.of(LBRACK, LPAREN, DOT))) {
return CollectionUtils.emptyList();
}
return CollectionUtils.filter(tokens, token -> token.getType() == IDENTIFIER);
}
}