eu.cqse.check.framework.shallowparser.languages.python.PythonShallowParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of teamscale-check-api Show documentation
Show all versions of teamscale-check-api Show documentation
The Teamscale Custom Check API allows users to extend Teamscale by writing custom analyses that create findings.
/*
* Copyright (c) CQSE GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.cqse.check.framework.shallowparser.languages.python;
import static eu.cqse.check.framework.scanner.ETokenType.AT;
import static eu.cqse.check.framework.scanner.ETokenType.CASE;
import static eu.cqse.check.framework.scanner.ETokenType.CLASS;
import static eu.cqse.check.framework.scanner.ETokenType.COLON;
import static eu.cqse.check.framework.scanner.ETokenType.COMMA;
import static eu.cqse.check.framework.scanner.ETokenType.DEDENT;
import static eu.cqse.check.framework.scanner.ETokenType.DEF;
import static eu.cqse.check.framework.scanner.ETokenType.DOT;
import static eu.cqse.check.framework.scanner.ETokenType.ELIF;
import static eu.cqse.check.framework.scanner.ETokenType.ELSE;
import static eu.cqse.check.framework.scanner.ETokenType.EOL;
import static eu.cqse.check.framework.scanner.ETokenType.EXCEPT;
import static eu.cqse.check.framework.scanner.ETokenType.FINALLY;
import static eu.cqse.check.framework.scanner.ETokenType.FOR;
import static eu.cqse.check.framework.scanner.ETokenType.FROM;
import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
import static eu.cqse.check.framework.scanner.ETokenType.IF;
import static eu.cqse.check.framework.scanner.ETokenType.IMPORT;
import static eu.cqse.check.framework.scanner.ETokenType.INDENT;
import static eu.cqse.check.framework.scanner.ETokenType.LAMBDA;
import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
import static eu.cqse.check.framework.scanner.ETokenType.LBRACK;
import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
import static eu.cqse.check.framework.scanner.ETokenType.MATCH;
import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;
import static eu.cqse.check.framework.scanner.ETokenType.TRY;
import static eu.cqse.check.framework.scanner.ETokenType.WHILE;
import static eu.cqse.check.framework.scanner.ETokenType.WITH;
import static eu.cqse.check.framework.shallowparser.languages.python.PythonShallowParser.EPythonParserStates.ANY;
import static eu.cqse.check.framework.shallowparser.languages.python.PythonShallowParser.EPythonParserStates.IN_CLASS;
import static eu.cqse.check.framework.shallowparser.languages.python.PythonShallowParser.EPythonParserStates.IN_LAMBDA;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
import org.conqat.lib.commons.collections.CollectionUtils;
import org.conqat.lib.commons.collections.UnmodifiableSet;
import org.conqat.lib.commons.region.Region;
import eu.cqse.check.framework.scanner.ETokenType;
import eu.cqse.check.framework.scanner.IToken;
import eu.cqse.check.framework.shallowparser.SubTypeNames;
import eu.cqse.check.framework.shallowparser.TokenStreamUtils;
import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType;
import eu.cqse.check.framework.shallowparser.framework.ParserState;
import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
import eu.cqse.check.framework.shallowparser.framework.ShallowParserBase;
import eu.cqse.check.framework.shallowparser.languages.python.PythonShallowParser.EPythonParserStates;
/**
* Shallow parser for Python.
*/
public class PythonShallowParser extends ShallowParserBase {
/** All token types that can be used as identifier. */
private static final UnmodifiableSet VALID_IDENTIFIERS = CollectionUtils
.asUnmodifiable(EnumSet.of(IDENTIFIER, MATCH, CASE));
/** The states used in this parser. */
public enum EPythonParserStates {
/**
* Any state apart from in-class states. Typically, any construct can occur at any place.
*/
ANY,
/** State applying within a class scope. */
IN_CLASS,
/** Within a lambda expression. */
IN_LAMBDA
}
/** Constructor. */
public PythonShallowParser() {
super(EPythonParserStates.class, EPythonParserStates.ANY);
createErrorRules();
createImportRules();
createDecoratorRules();
createClassRules();
createFunctionRules();
createLambdaRule();
createStatementRules();
}
/** Create rules for handling error handling. */
private void createErrorRules() {
// unmatched indent/dedent: no endNode to keep the node incomplete
inAnyState().sequence(INDENT).createNode(EShallowEntityType.META, "Unmatched indent");
inAnyState().sequence(DEDENT).createNode(EShallowEntityType.META, "Unmatched dedent");
}
/** Creates parsing rules for imports. */
private void createImportRules() {
inAnyState().sequence(EnumSet.of(IMPORT, FROM)).createNode(EShallowEntityType.META, 0).skipTo(EOL).endNode();
}
/** Creates parsing rules for decorators. */
private void createDecoratorRules() {
inAnyState().sequence(AT, VALID_IDENTIFIERS).repeated(DOT, VALID_IDENTIFIERS)
.createNode(EShallowEntityType.META, SubTypeNames.DECORATOR, new Region(1, -1))
.skipNested(LPAREN, RPAREN, createLambdaSubRecognizer()).endNode();
}
/** Creates parsing rules for classes. */
private void createClassRules() {
RecognizerBase classAlternative = inAnyState().sequence(CLASS, VALID_IDENTIFIERS)
.skipNested(LPAREN, RPAREN).sequence(COLON).createNode(EShallowEntityType.TYPE, SubTypeNames.CLASS, 1);
addBlockClosingAlternatives(classAlternative, IN_CLASS);
}
/**
* Creates parsing rules for functions. Considers that type hinting could be used for parameters and
* a function too. See PEP 0484
*/
private void createFunctionRules() {
RecognizerBase functionAlternative = inAnyState().sequence(DEF, VALID_IDENTIFIERS)
.createNode(EShallowEntityType.METHOD, SubTypeNames.METHOD, 1).skipNested(LPAREN, RPAREN).skipTo(COLON);
addBlockClosingAlternatives(functionAlternative, ANY);
}
/** Creates parsing rules for statements. */
private void createStatementRules() {
// empty statement
inAnyState().sequence(SEMICOLON).createNode(EShallowEntityType.STATEMENT, SubTypeNames.EMPTY_STATEMENT)
.endNode();
// block statements
RecognizerBase ifAlternative = inAnyState()
.sequence(EnumSet.of(IF, ELIF, ELSE, TRY, EXCEPT, FINALLY, WHILE, FOR, WITH))
.createNode(EShallowEntityType.STATEMENT, 0)
.skipToWithNesting(COLON, Arrays.asList(LBRACK, LBRACE, LPAREN), Arrays.asList(RBRACK, RBRACE, RPAREN));
addBlockClosingAlternatives(ifAlternative, ANY);
// (MATCH, CASE) block statements for valid identifiers. (TS-34832)
// find COLON before EOL since it could find a COLON anywhere after the sequence
RecognizerBase ifAlternativeValidIdentifiers = inAnyState()
.sequence(EnumSet.of(MATCH, CASE)).preCondition(new NotAMethodCallRecognizer())
.preCondition(createRecognizer(start -> start.skipBefore(EnumSet.of(COLON, EOL)).sequence(COLON)))
.createNode(EShallowEntityType.STATEMENT, 0)
.skipToWithNesting(COLON, Arrays.asList(LBRACK, LBRACE, LPAREN), Arrays.asList(RBRACK, RBRACE, RPAREN));
addBlockClosingAlternatives(ifAlternativeValidIdentifiers, ANY);
// remove any isolated EOLs
inAnyState().sequence(EOL);
// in class attributes
inState(IN_CLASS).sequenceBefore(VALID_IDENTIFIERS).subRecognizer(new PythonAttributeRecognizer(), 1, 1)
.endNode();
// simple statement
inState(ANY, IN_CLASS).sequenceBefore(EnumSet.complementOf(EnumSet.of(DEF, CLASS)))
.subRecognizer(new PythonSimpleStatementRecognizer(), 1, 1).endNode();
/*
* Parse a simple statement inside a lambda. This is treated separately as in this case the comma
* can occur as statement separator (CR#22933).
*/
inState(IN_LAMBDA).sequenceBefore(EnumSet.complementOf(EnumSet.of(DEF, CLASS)))
.subRecognizer(new PythonSimpleStatementRecognizer(true), 1, 1).endNode();
}
/** Creates a rule for parsing lambdas. */
private void createLambdaRule() {
inState(ANY).sequence(LAMBDA).skipTo(COLON).createNode(EShallowEntityType.METHOD, SubTypeNames.LAMBDA)
.parseOnce(IN_LAMBDA)
.sequenceBefore(EnumSet.of(RBRACK, RPAREN, DEDENT, EOL, COMMA, /* e.g. inside dictionary */RBRACE))
.endNode();
}
/**
* Creates a recognizer that detects a lambda expression and parses into it.
*/
private RecognizerBase createLambdaSubRecognizer() {
return createRecognizer(start -> start.sequenceBefore(LAMBDA).parseOnce(ANY));
}
/**
* Adds two different rules for closing a block:
*
* - Closing a block by finding a dedent
* - Single line that ends with EOL, typically this means multiple statements on one line
*
*
* @param matchingAlternative
* The block recognizer to be closed.
* @param innerBlockState
* The {@link EPythonParserStates} used within the block this method is closing.
*/
private static void addBlockClosingAlternatives(RecognizerBase matchingAlternative,
EPythonParserStates innerBlockState) {
matchingAlternative.sequence(EOL, INDENT).parseUntil(innerBlockState).sequence(DEDENT).endNode();
matchingAlternative.parseUntil(innerBlockState).sequence(EOL).endNode();
}
/** {@inheritDoc} */
@Override
protected boolean isFilteredToken(IToken token, IToken previousToken) {
if (super.isFilteredToken(token, previousToken)) {
return true;
}
// Don't allow double EOLs
return previousToken != null && previousToken.getType() == EOL && token.getType() == EOL;
}
/** Detects if the given LPAREN is not the beginning of a method call. */
private static class NotAMethodCallRecognizer extends RecognizerBase {
@Override
protected int matchesLocally(ParserState parserState, List tokens,
int startOffset) {
if (tokens.get(startOffset).getType() != LPAREN) {
return startOffset;
}
if (startOffset < 1) {
return NO_MATCH;
}
// We will search for a closing brace and then check if the next token is not a
// COLON, in that case we are looking at someone calling a method named
// case()/match(), otherwise it is probably a match followed by a tuple
int closingBrace = TokenStreamUtils.findMatchingClosingToken(tokens, startOffset + 1, LPAREN, RPAREN);
if (closingBrace != TokenStreamUtils.NOT_FOUND && tokens.size() > closingBrace + 1
&& tokens.get(closingBrace + 1).getType() != COLON) {
return NO_MATCH;
}
return super.matchesLocally(parserState, tokens, startOffset);
}
}
}