All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.cqse.check.framework.util.tokens.TokenStreamTransformationPattern Maven / Gradle / Ivy

Go to download

The Teamscale Custom Check API allows users to extend Teamscale by writing custom analyses that create findings.

There is a newer version: 2024.7.2
Show newest version
/*
 * Copyright (c) CQSE GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package eu.cqse.check.framework.util.tokens;

import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import org.checkerframework.checker.nullness.qual.NonNull;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.conqat.lib.commons.assertion.CCSMAssert;
import org.conqat.lib.commons.collections.CollectionUtils;

import eu.cqse.check.framework.core.CheckException;
import eu.cqse.check.framework.scanner.ArtificialTokenOriginIds;
import eu.cqse.check.framework.scanner.ELanguage;
import eu.cqse.check.framework.scanner.ETokenType;
import eu.cqse.check.framework.scanner.IToken;
import eu.cqse.check.framework.scanner.ScannerUtils;
import eu.cqse.check.framework.shallowparser.TokenStreamUtils;

/**
 * Transforms a token stream according to a search and a replace pattern. Both patterns can contain
 * variables in the form of $name.
 *
 * 

Example

*
    *
  • Language: Java *
  • Search Pattern: assertNotNull($a) *
  • Replacement Pattern: $a != null *
  • Input: assertNotNull(foo) *
  • Output: foo != null *
* * Variables will not match the semicolon token. Additionally, variables can be postfixed with a * number, e.g. $a1 to signal that the code matched to $a1 should only be * exactly 1 token in length. For C# a block {@code {}} around the result is removed when this * matches a {@code out Type Identifier} variable introduction to avoid scoping issues, see * {@link #removeBlockIfCSharpOutIntroducesVariable}. */ public class TokenStreamTransformationPattern { /** * Prefix of pattern variables. */ private static final String VARIABLE_PREFIX = "$"; /** The search pattern transformed into matchers. */ private final List matchers = new ArrayList<>(); /** The replacement pattern as tokens of the input language. */ private final List replacementPatternTokens; private final ELanguage language; /** Returned if the matcher did not match. */ public static final int NO_MATCH = -1; /** * Constructor. * * @throws CheckException * if the search pattern has invalid syntax. */ public TokenStreamTransformationPattern(String searchPatternString, String replacementPatternString, ELanguage language) throws CheckException { List searchPatternTokens = ScannerUtils.getTokens(searchPatternString, language, ArtificialTokenOriginIds.TOKEN_STREAM_TRANSFORMATION_PATTERN); createMatchers(searchPatternTokens); replacementPatternTokens = ScannerUtils.getTokens(replacementPatternString, language, ArtificialTokenOriginIds.TOKEN_STREAM_TRANSFORMATION_PATTERN); this.language = language; } /** * Creates matchers from the given search pattern tokens. * * @throws CheckException * if the pattern has invalid syntax. */ private void createMatchers(List searchPatternTokens) throws CheckException { for (int i = 0; i < searchPatternTokens.size(); i++) { IToken token = searchPatternTokens.get(i); String text = token.getText(); ETokenType type = token.getType(); if (type == IDENTIFIER && text.startsWith(VARIABLE_PREFIX)) { if (i + 1 < searchPatternTokens.size()) { ETokenType endTokenType = searchPatternTokens.get(i + 1).getType(); matchers.add(new VariableMatcher(text, EnumSet.of(endTokenType, ETokenType.SEMICOLON))); } else { throw new CheckException("The last token in the search pattern may not be a variable!"); } } else { matchers.add(new TokenTypeMatcher(type, text)); } } } /** * Applies the pattern to the given tokens and returns the transformed token list. In case the * pattern does not match, null is returned. For C# a block {@code {}} around the * result is removed when this matches a {@code out Type Identifier} variable introduction to avoid * scoping issues, see {@link #removeBlockIfCSharpOutIntroducesVariable}. */ private @Nullable Result apply(ELanguage language, List tokens, int position) { Map> variables = new HashMap<>(); int matchedTokens = matchSearchPattern(tokens, position, variables); if (matchedTokens == NO_MATCH) { return null; } List result = createResult(tokens.get(position), variables); return removeBlockIfCSharpOutIntroducesVariable(language, result, variables, matchedTokens); } /** * TS-38134 Removes block scope if a C# out parameter is used that introduces a new variable as * otherwise that variable would have the wrong scope. */ private static @NonNull Result removeBlockIfCSharpOutIntroducesVariable(ELanguage language, List result, Map> variables, int matchedTokens) { if (language == ELanguage.CS && surroundedWithBlock(result) && containsCSharpOutVariableIntroduction(variables.values())) { // remove block consisting of {} around result return new Result(result.subList(1, result.size() - 1), matchedTokens); } else { return new Result(result, matchedTokens); } } /** * Returns whether the tokens are surrounded by a block consisting of {@code {}} */ private static boolean surroundedWithBlock(List result) { return !result.isEmpty() && result.get(0).getType() == ETokenType.LBRACE && result.get(result.size() - 1).getType() == ETokenType.RBRACE; } /** * Returns whether any of the variables contains a C# out variable introduction which has a pattern * of {@code out Type Identifier}. */ private static boolean containsCSharpOutVariableIntroduction(Collection> variables) { return variables.stream() .anyMatch(t -> IntStream.range(0, t.size() - 2).anyMatch(i -> t.get(i).getType() == ETokenType.OUT && t.get(i + 2).getType().getTokenClass() == ETokenType.ETokenClass.IDENTIFIER)); } /** * Applies the given patterns for the language of the token stream on it and replaces all matches * with the transformed tokens. For C# a block {@code {}} around the result is removed when this * matches a {@code out Type Identifier} variable introduction to avoid scoping issues, see * {@link #removeBlockIfCSharpOutIntroducesVariable}. */ public static List applyPatterns(List tokens, List patterns) { if (tokens.isEmpty()) { return CollectionUtils.emptyList(); } ELanguage language = tokens.get(0).getLanguage(); List transformedTokens = new ArrayList<>(); int position = 0; List filteredPatterns = patterns.stream() .filter(p -> p.checkIfTypeMatcherMatches(tokens)).collect(Collectors.toList()); while (position < tokens.size()) { Result result = applyPatterns(language, tokens, filteredPatterns, position); if (result == null) { transformedTokens.add(tokens.get(position)); position += 1; } else { transformedTokens.addAll(result.getTransformedTokens()); position += result.getMatchedTokens(); } } return transformedTokens; } /** * Applies the given patterns for the given language at the given position. The result of the first * match is returned. If no pattern matches, null is returned. For C# a block * {@code {}} around the result is removed when this matches a {@code out Type Identifier} variable * introduction to avoid scoping issues, see {@link #removeBlockIfCSharpOutIntroducesVariable}. */ private static @Nullable Result applyPatterns(ELanguage language, List tokens, List patterns, int position) { for (TokenStreamTransformationPattern pattern : patterns) { if (pattern.language != language) { continue; } Result result = pattern.apply(language, tokens, position); if (result != null) { return result; } } return null; } /** * Checks, if all {@link TokenTypeMatcher}s of {@link #matchers} have at least one match. If not, * there we do not have to apply the pattern. This is an early exit based on the assumption, that * most files do not contain the text we search , e.g. {@code getOrDefault}. This way we can skip * applying the more expensive {@link VariableMatcher}s in many cases. */ private boolean checkIfTypeMatcherMatches(List tokens) { for (IMatcher matcher : matchers) { if (matcher instanceof TokenTypeMatcher) { TokenTypeMatcher tokenTypeMatcher = (TokenTypeMatcher) matcher; boolean matches = tokenTypeMatcher.hasAnyMatch(tokens); if (!matches) { return false; } } } return true; } /** * Matches the matchers against the given tokens and returns the variable map created by the * matchers. Returns {@link #NO_MATCH} if the matchers do not match the token stream. */ private int matchSearchPattern(List tokens, int startPosition, Map> variables) { int tokenPosition = startPosition; for (IMatcher matcher : matchers) { if (tokenPosition >= tokens.size()) { return NO_MATCH; } int nextPosition = matcher.apply(tokens, tokenPosition, variables); if (nextPosition == NO_MATCH) { return NO_MATCH; } CCSMAssert.isTrue(nextPosition > tokenPosition, "Matcher did not advance token stream."); tokenPosition = nextPosition; } return tokenPosition - startPosition; } /** * Creates the result token list based on the given variable map. */ private List createResult(IToken baseToken, Map> variables) { List result = new ArrayList<>(); for (IToken token : replacementPatternTokens) { String text = token.getText(); if (token.getType() == IDENTIFIER && text.startsWith(VARIABLE_PREFIX)) { List variableMatch = variables.get(text); CCSMAssert.isNotNull(variableMatch, "Variable " + text + " was not matched"); result.addAll(variableMatch); } else { result.add(token.newToken(token.getType(), baseToken.getOffset(), baseToken.getLineNumber(), token.getText(), baseToken.getOriginId())); } } return result; } public ELanguage getLanguage() { return language; } /** Result of successfully matching one pattern. */ private static class Result { /** The transformed tokens. */ private final List transformedTokens; /** The number of tokens that matched in the input token stream. */ private final int matchedTokens; private Result(List transformedTokens, int matchedTokens) { this.transformedTokens = transformedTokens; this.matchedTokens = matchedTokens; } /** * @see #matchedTokens */ private int getMatchedTokens() { return matchedTokens; } /** * @see #transformedTokens */ private List getTransformedTokens() { return transformedTokens; } } /** Matches part of the search pattern against the token stream. */ private interface IMatcher { /** * Tries to match this matcher at the given position in the token stream. May modify the given * variables map. Returns {@link TokenStreamTransformationPattern#NO_MATCH} if the matcher does not * apply at this position. Otherwise, returns the position where the next matcher should be applied. */ int apply(List tokens, int position, Map> variables); } /** * Matches if the token at the current position has a certain type and text. */ private static class TokenTypeMatcher implements IMatcher { /** The token type to match. */ private final ETokenType type; /** The expected text. */ private final String text; private TokenTypeMatcher(ETokenType type, String text) { this.type = type; this.text = text; } @Override public int apply(List tokens, int position, Map> variables) { IToken token = tokens.get(position); if (token.getType() == type && token.getText().equals(text)) { return position + 1; } return NO_MATCH; } private boolean hasAnyMatch(List tokens) { IToken tokenByTypeAndText = TokenStreamUtils.getTokenByTypeAndText(tokens, text, Collections.singleton(type)); return tokenByTypeAndText != null; } @Override public String toString() { return "TokenTypeMatcher[type=" + type + ",text=" + text + "]"; } } /** * Matches a variable from the current position in the token stream to the first occurrence of the * end token. */ private static class VariableMatcher implements IMatcher { /** The pattern to find out how many tokens long a match should be */ private static final Pattern MATCH_LENGTH_PATTERN = Pattern.compile("\\$[a-zA-Z]+([0-9]+)"); /** The variable to match. */ private final String variableName; /** * The token type that signals the end of the variable match. */ private final Set endTokenType; /** * The number of tokens that the variable should match. This may be null to express that the current * variable does not contain a variable count. This means, it will greedily match as many tokens as * possible. */ private Integer numberOfTokensToMatch = null; /** * Indicates that {@link #endTokenType} has not been found in the token stream. Therefore, we don't * have to search again the next time the matcher is applied. */ private boolean reachedEndOfTokenStream = false; private VariableMatcher(String variableName, Set endTokenType) { this.variableName = variableName; this.endTokenType = endTokenType; Matcher matcher = MATCH_LENGTH_PATTERN.matcher(variableName); if (matcher.matches()) { numberOfTokensToMatch = Integer.parseInt(matcher.group(1)); } } @Override public int apply(List tokens, int position, Map> variables) { // If we did not find anything the last time, we will not find anything this time. if (reachedEndOfTokenStream) { return NO_MATCH; } int endIndex = TokenStreamUtils.findFirstTopLevel(tokens, position, endTokenType, List.of(ETokenType.LPAREN), List.of(ETokenType.RPAREN)); if (endIndex == TokenStreamUtils.NOT_FOUND) { reachedEndOfTokenStream = true; return NO_MATCH; } if (position == endIndex) { return NO_MATCH; } if (numberOfTokensToMatch != null && endIndex > position + numberOfTokensToMatch) { endIndex = position + numberOfTokensToMatch; } variables.put(variableName, tokens.subList(position, endIndex)); return endIndex; } @Override public String toString() { return "VariableMatcher[variableName=" + variableName + ",endTokenType=" + endTokenType + "]"; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy