eu.cqse.check.framework.util.tokens.TokenStreamTransformationPattern Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of teamscale-check-api Show documentation
Show all versions of teamscale-check-api Show documentation
The Teamscale Custom Check API allows users to extend Teamscale by writing custom analyses that create findings.
/*
* Copyright (c) CQSE GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.cqse.check.framework.util.tokens;
import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.conqat.lib.commons.assertion.CCSMAssert;
import org.conqat.lib.commons.collections.CollectionUtils;
import eu.cqse.check.framework.core.CheckException;
import eu.cqse.check.framework.scanner.ArtificialTokenOriginIds;
import eu.cqse.check.framework.scanner.ELanguage;
import eu.cqse.check.framework.scanner.ETokenType;
import eu.cqse.check.framework.scanner.IToken;
import eu.cqse.check.framework.scanner.ScannerUtils;
import eu.cqse.check.framework.shallowparser.TokenStreamUtils;
/**
* Transforms a token stream according to a search and a replace pattern. Both patterns can contain
* variables in the form of $name
.
*
* Example
*
* - Language: Java
*
- Search Pattern:
assertNotNull($a)
* - Replacement Pattern:
$a != null
* - Input:
assertNotNull(foo)
* - Output:
foo != null
*
*
* Variables will not match the semicolon token. Additionally, variables can be postfixed with a
* number, e.g. $a1
to signal that the code matched to $a1
should only be
* exactly 1 token in length. For C# a block {@code {}} around the result is removed when this
* matches a {@code out Type Identifier} variable introduction to avoid scoping issues, see
* {@link #removeBlockIfCSharpOutIntroducesVariable}.
*/
public class TokenStreamTransformationPattern {
/**
* Prefix of pattern variables.
*/
private static final String VARIABLE_PREFIX = "$";
/** The search pattern transformed into matchers. */
private final List matchers = new ArrayList<>();
/** The replacement pattern as tokens of the input language. */
private final List replacementPatternTokens;
private final ELanguage language;
/** Returned if the matcher did not match. */
public static final int NO_MATCH = -1;
/**
* Constructor.
*
* @throws CheckException
* if the search pattern has invalid syntax.
*/
public TokenStreamTransformationPattern(String searchPatternString, String replacementPatternString,
ELanguage language) throws CheckException {
List searchPatternTokens = ScannerUtils.getTokens(searchPatternString, language,
ArtificialTokenOriginIds.TOKEN_STREAM_TRANSFORMATION_PATTERN);
createMatchers(searchPatternTokens);
replacementPatternTokens = ScannerUtils.getTokens(replacementPatternString, language,
ArtificialTokenOriginIds.TOKEN_STREAM_TRANSFORMATION_PATTERN);
this.language = language;
}
/**
* Creates matchers from the given search pattern tokens.
*
* @throws CheckException
* if the pattern has invalid syntax.
*/
private void createMatchers(List searchPatternTokens) throws CheckException {
for (int i = 0; i < searchPatternTokens.size(); i++) {
IToken token = searchPatternTokens.get(i);
String text = token.getText();
ETokenType type = token.getType();
if (type == IDENTIFIER && text.startsWith(VARIABLE_PREFIX)) {
if (i + 1 < searchPatternTokens.size()) {
ETokenType endTokenType = searchPatternTokens.get(i + 1).getType();
matchers.add(new VariableMatcher(text, EnumSet.of(endTokenType, ETokenType.SEMICOLON)));
} else {
throw new CheckException("The last token in the search pattern may not be a variable!");
}
} else {
matchers.add(new TokenTypeMatcher(type, text));
}
}
}
/**
* Applies the pattern to the given tokens and returns the transformed token list. In case the
* pattern does not match, null
is returned. For C# a block {@code {}} around the
* result is removed when this matches a {@code out Type Identifier} variable introduction to avoid
* scoping issues, see {@link #removeBlockIfCSharpOutIntroducesVariable}.
*/
private @Nullable Result apply(ELanguage language, List tokens, int position) {
Map> variables = new HashMap<>();
int matchedTokens = matchSearchPattern(tokens, position, variables);
if (matchedTokens == NO_MATCH) {
return null;
}
List result = createResult(tokens.get(position), variables);
return removeBlockIfCSharpOutIntroducesVariable(language, result, variables, matchedTokens);
}
/**
* TS-38134 Removes block scope if a C# out parameter is used that introduces a new variable as
* otherwise that variable would have the wrong scope.
*/
private static @NonNull Result removeBlockIfCSharpOutIntroducesVariable(ELanguage language, List result,
Map> variables, int matchedTokens) {
if (language == ELanguage.CS && surroundedWithBlock(result)
&& containsCSharpOutVariableIntroduction(variables.values())) {
// remove block consisting of {} around result
return new Result(result.subList(1, result.size() - 1), matchedTokens);
} else {
return new Result(result, matchedTokens);
}
}
/**
* Returns whether the tokens are surrounded by a block consisting of {@code {}}
*/
private static boolean surroundedWithBlock(List result) {
return !result.isEmpty() && result.get(0).getType() == ETokenType.LBRACE
&& result.get(result.size() - 1).getType() == ETokenType.RBRACE;
}
/**
* Returns whether any of the variables contains a C# out variable introduction which has a pattern
* of {@code out Type Identifier}.
*/
private static boolean containsCSharpOutVariableIntroduction(Collection> variables) {
return variables.stream()
.anyMatch(t -> IntStream.range(0, t.size() - 2).anyMatch(i -> t.get(i).getType() == ETokenType.OUT
&& t.get(i + 2).getType().getTokenClass() == ETokenType.ETokenClass.IDENTIFIER));
}
/**
* Applies the given patterns for the language of the token stream on it and replaces all matches
* with the transformed tokens. For C# a block {@code {}} around the result is removed when this
* matches a {@code out Type Identifier} variable introduction to avoid scoping issues, see
* {@link #removeBlockIfCSharpOutIntroducesVariable}.
*/
public static List applyPatterns(List tokens, List patterns) {
if (tokens.isEmpty()) {
return CollectionUtils.emptyList();
}
ELanguage language = tokens.get(0).getLanguage();
List transformedTokens = new ArrayList<>();
int position = 0;
List filteredPatterns = patterns.stream()
.filter(p -> p.checkIfTypeMatcherMatches(tokens)).collect(Collectors.toList());
while (position < tokens.size()) {
Result result = applyPatterns(language, tokens, filteredPatterns, position);
if (result == null) {
transformedTokens.add(tokens.get(position));
position += 1;
} else {
transformedTokens.addAll(result.getTransformedTokens());
position += result.getMatchedTokens();
}
}
return transformedTokens;
}
/**
* Applies the given patterns for the given language at the given position. The result of the first
* match is returned. If no pattern matches, null
is returned. For C# a block
* {@code {}} around the result is removed when this matches a {@code out Type Identifier} variable
* introduction to avoid scoping issues, see {@link #removeBlockIfCSharpOutIntroducesVariable}.
*/
private static @Nullable Result applyPatterns(ELanguage language, List tokens,
List patterns, int position) {
for (TokenStreamTransformationPattern pattern : patterns) {
if (pattern.language != language) {
continue;
}
Result result = pattern.apply(language, tokens, position);
if (result != null) {
return result;
}
}
return null;
}
/**
* Checks, if all {@link TokenTypeMatcher}s of {@link #matchers} have at least one match. If not,
* there we do not have to apply the pattern. This is an early exit based on the assumption, that
* most files do not contain the text we search , e.g. {@code getOrDefault}. This way we can skip
* applying the more expensive {@link VariableMatcher}s in many cases.
*/
private boolean checkIfTypeMatcherMatches(List tokens) {
for (IMatcher matcher : matchers) {
if (matcher instanceof TokenTypeMatcher) {
TokenTypeMatcher tokenTypeMatcher = (TokenTypeMatcher) matcher;
boolean matches = tokenTypeMatcher.hasAnyMatch(tokens);
if (!matches) {
return false;
}
}
}
return true;
}
/**
* Matches the matchers against the given tokens and returns the variable map created by the
* matchers. Returns {@link #NO_MATCH}
if the matchers do not match the token stream.
*/
private int matchSearchPattern(List tokens, int startPosition, Map> variables) {
int tokenPosition = startPosition;
for (IMatcher matcher : matchers) {
if (tokenPosition >= tokens.size()) {
return NO_MATCH;
}
int nextPosition = matcher.apply(tokens, tokenPosition, variables);
if (nextPosition == NO_MATCH) {
return NO_MATCH;
}
CCSMAssert.isTrue(nextPosition > tokenPosition, "Matcher did not advance token stream.");
tokenPosition = nextPosition;
}
return tokenPosition - startPosition;
}
/**
* Creates the result token list based on the given variable map.
*/
private List createResult(IToken baseToken, Map> variables) {
List result = new ArrayList<>();
for (IToken token : replacementPatternTokens) {
String text = token.getText();
if (token.getType() == IDENTIFIER && text.startsWith(VARIABLE_PREFIX)) {
List variableMatch = variables.get(text);
CCSMAssert.isNotNull(variableMatch, "Variable " + text + " was not matched");
result.addAll(variableMatch);
} else {
result.add(token.newToken(token.getType(), baseToken.getOffset(), baseToken.getLineNumber(),
token.getText(), baseToken.getOriginId()));
}
}
return result;
}
public ELanguage getLanguage() {
return language;
}
/** Result of successfully matching one pattern. */
private static class Result {
/** The transformed tokens. */
private final List transformedTokens;
/** The number of tokens that matched in the input token stream. */
private final int matchedTokens;
private Result(List transformedTokens, int matchedTokens) {
this.transformedTokens = transformedTokens;
this.matchedTokens = matchedTokens;
}
/**
* @see #matchedTokens
*/
private int getMatchedTokens() {
return matchedTokens;
}
/**
* @see #transformedTokens
*/
private List getTransformedTokens() {
return transformedTokens;
}
}
/** Matches part of the search pattern against the token stream. */
private interface IMatcher {
/**
* Tries to match this matcher at the given position in the token stream. May modify the given
* variables map. Returns {@link TokenStreamTransformationPattern#NO_MATCH} if the matcher does not
* apply at this position. Otherwise, returns the position where the next matcher should be applied.
*/
int apply(List tokens, int position, Map> variables);
}
/**
* Matches if the token at the current position has a certain type and text.
*/
private static class TokenTypeMatcher implements IMatcher {
/** The token type to match. */
private final ETokenType type;
/** The expected text. */
private final String text;
private TokenTypeMatcher(ETokenType type, String text) {
this.type = type;
this.text = text;
}
@Override
public int apply(List tokens, int position, Map> variables) {
IToken token = tokens.get(position);
if (token.getType() == type && token.getText().equals(text)) {
return position + 1;
}
return NO_MATCH;
}
private boolean hasAnyMatch(List tokens) {
IToken tokenByTypeAndText = TokenStreamUtils.getTokenByTypeAndText(tokens, text,
Collections.singleton(type));
return tokenByTypeAndText != null;
}
@Override
public String toString() {
return "TokenTypeMatcher[type=" + type + ",text=" + text + "]";
}
}
/**
* Matches a variable from the current position in the token stream to the first occurrence of the
* end token.
*/
private static class VariableMatcher implements IMatcher {
/** The pattern to find out how many tokens long a match should be */
private static final Pattern MATCH_LENGTH_PATTERN = Pattern.compile("\\$[a-zA-Z]+([0-9]+)");
/** The variable to match. */
private final String variableName;
/**
* The token type that signals the end of the variable match.
*/
private final Set endTokenType;
/**
* The number of tokens that the variable should match. This may be null to express that the current
* variable does not contain a variable count. This means, it will greedily match as many tokens as
* possible.
*/
private Integer numberOfTokensToMatch = null;
/**
* Indicates that {@link #endTokenType} has not been found in the token stream. Therefore, we don't
* have to search again the next time the matcher is applied.
*/
private boolean reachedEndOfTokenStream = false;
private VariableMatcher(String variableName, Set endTokenType) {
this.variableName = variableName;
this.endTokenType = endTokenType;
Matcher matcher = MATCH_LENGTH_PATTERN.matcher(variableName);
if (matcher.matches()) {
numberOfTokensToMatch = Integer.parseInt(matcher.group(1));
}
}
@Override
public int apply(List tokens, int position, Map> variables) {
// If we did not find anything the last time, we will not find anything this time.
if (reachedEndOfTokenStream) {
return NO_MATCH;
}
int endIndex = TokenStreamUtils.findFirstTopLevel(tokens, position, endTokenType,
List.of(ETokenType.LPAREN), List.of(ETokenType.RPAREN));
if (endIndex == TokenStreamUtils.NOT_FOUND) {
reachedEndOfTokenStream = true;
return NO_MATCH;
}
if (position == endIndex) {
return NO_MATCH;
}
if (numberOfTokensToMatch != null && endIndex > position + numberOfTokensToMatch) {
endIndex = position + numberOfTokensToMatch;
}
variables.put(variableName, tokens.subList(position, endIndex));
return endIndex;
}
@Override
public String toString() {
return "VariableMatcher[variableName=" + variableName + ",endTokenType=" + endTokenType + "]";
}
}
}