eu.cqse.check.framework.shallowparser.languages.kotlin.KotlinStatementSubRecognizer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of teamscale-check-api Show documentation
The Teamscale Custom Check API allows users to extend Teamscale by writing custom analyses that create findings.
There is a newer version: 2024.7.2
/*
 * Copyright (c) CQSE GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package eu.cqse.check.framework.shallowparser.languages.kotlin;

import static eu.cqse.check.framework.scanner.ETokenType.ANDAND;
import static eu.cqse.check.framework.scanner.ETokenType.DIV;
import static eu.cqse.check.framework.scanner.ETokenType.DOT;
import static eu.cqse.check.framework.scanner.ETokenType.ELSE;
import static eu.cqse.check.framework.scanner.ETokenType.ELVIS;
import static eu.cqse.check.framework.scanner.ETokenType.EOF;
import static eu.cqse.check.framework.scanner.ETokenType.EOL;
import static eu.cqse.check.framework.scanner.ETokenType.EQ;
import static eu.cqse.check.framework.scanner.ETokenType.EQEQ;
import static eu.cqse.check.framework.scanner.ETokenType.GT;
import static eu.cqse.check.framework.scanner.ETokenType.GTEQ;
import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
import static eu.cqse.check.framework.scanner.ETokenType.LT;
import static eu.cqse.check.framework.scanner.ETokenType.LTEQ;
import static eu.cqse.check.framework.scanner.ETokenType.MINUS;
import static eu.cqse.check.framework.scanner.ETokenType.MOD;
import static eu.cqse.check.framework.scanner.ETokenType.MULT;
import static eu.cqse.check.framework.scanner.ETokenType.NOTEQ;
import static eu.cqse.check.framework.scanner.ETokenType.OR;
import static eu.cqse.check.framework.scanner.ETokenType.OROR;
import static eu.cqse.check.framework.scanner.ETokenType.PLUS;
import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
import static eu.cqse.check.framework.scanner.ETokenType.SAFECALL_OPERATOR;
import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;

import java.util.EnumSet;
import java.util.List;

import eu.cqse.check.framework.scanner.ETokenType;
import eu.cqse.check.framework.scanner.IToken;
import eu.cqse.check.framework.shallowparser.framework.ParserState;
import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
import eu.cqse.check.framework.shallowparser.framework.RecognizerUtils;

/**
 * A recognizer that allows to match a Kotlin statement, which potentially spans across multiple
 * lines
 */
public class KotlinStatementSubRecognizer extends RecognizerBase {

	/**
	 * All tokens that are valid statement separators.
	 */
	private static final EnumSet STATEMENT_SEPARATORS = EnumSet.of(EOL, SEMICOLON, RBRACE, EOF, LT, RPAREN,
			RBRACK, ELSE);

	/**
	 * Binary operators, which can cause a statement to continue on the next line, even though a EOL has
	 * been found.
	 */
	private static final EnumSet BINARY_OPERATORS = EnumSet.of(DOT, SAFECALL_OPERATOR, ELVIS, PLUS, MINUS,
			MULT, DIV, MOD, ANDAND, OROR, EQEQ, NOTEQ, GT, LT, LTEQ, GTEQ, OR, EQ);

	/** Matches a partial statement, which does only match one line. */
	private final RecognizerBase partialStatementRecognizer;

	/**
	 * Matches the end of a line together with tokens that allow the statement to continue.
	 */
	private final RecognizerBase statementContinuationRecognizer = new RecognizerBase() {

		/**
		 * Expects to be invoked after a STATEMENT_SEPARATOR has been found. It returns NO_MATCH if anything
		 * besides EOL is found at the end. In case of EOL if the last token of the currently ended line or
		 * the first token of the next non-empty line is a binary operator and therefore allows the
		 * statement to be continued it matches to the position of the first token after the operator and
		 * EOL's.
		 */
		@Override
		protected int matchesLocally(ParserState parserState, List tokens,
				int startOffset) {
			if (startOffset < tokens.size() && tokens.get(startOffset).getType() == EOL) {
				if (startOffset > 1) {
					IToken lastTokenOnLine = tokens.get(startOffset - 1);
					if (BINARY_OPERATORS.contains(lastTokenOnLine.getType())) {
						return skipEOLs(tokens, startOffset);
					}
				}

				int newOffset = skipEOLs(tokens, startOffset);
				if (newOffset >= tokens.size()) {
					return NO_MATCH;
				}
				IToken tokenOnNextLine = tokens.get(newOffset);
				if (tokenOnNextLine.getType() != EOL) {
					if (BINARY_OPERATORS.contains(tokenOnNextLine.getType()) || tokenOnNextLine.getType() == LBRACE) {
						return newOffset;
					}
					return NO_MATCH;
				}
			}

			return NO_MATCH;
		}

		/**
		 * Skips EOL tokens in the given token list and returns the index of the first non-EOL token.
		 * Implicitly expects
		 */
		private int skipEOLs(List tokens, int startOffset) {
			for (int i = startOffset; i < tokens.size(); i++) {
				if (tokens.get(i).getType() != EOL) {
					return i;
				}
			}
			return tokens.size();
		}

		/** {@inheritDoc} */
		@Override
		protected String getRecognizerStringRepresentation() {
			return "statement continuation";
		}
	};

	/**
	 * Constructor.
	 * 
	 * @param subExpressionRecognizer
	 *            Recognizer to be used to find nested lambdas etc. in the statement
	 * @param openingBrackets
	 *            List of opening brackets, which are skipped
	 * @param closingBrackets
	 *            List of closing brackets, which are skipped
	 */
	public KotlinStatementSubRecognizer(RecognizerBase subExpressionRecognizer,
			List openingBrackets, List closingBrackets) {
		RecognizerBase genericRecognizer = RecognizerUtils.createRecognizer(start -> {
			start.sequence(LT).repeated(KotlinShallowParser.VALID_INSIDE_GENERIC_TOKEN_TYPES).sequence(GT)
					.skipBeforeWithNesting(STATEMENT_SEPARATORS, openingBrackets, closingBrackets,
							subExpressionRecognizer);
			start.sequence(LT).skipBeforeWithNesting(STATEMENT_SEPARATORS, openingBrackets, closingBrackets,
					subExpressionRecognizer);
		});
		partialStatementRecognizer = RecognizerUtils.createRecognizer(start -> start
				.skipBeforeWithNesting(STATEMENT_SEPARATORS, openingBrackets, closingBrackets, subExpressionRecognizer)
				.repeatedSubRecognizer(genericRecognizer).optional(SEMICOLON));
	}

	/** {@inheritDoc} */
	@Override
	protected int matchesLocally(ParserState parserState, List tokens, int startOffset) {
		int currentOffset = startOffset;

		while (true) {
			int newOffset = partialStatementRecognizer.matches(parserState, tokens, currentOffset);
			if (newOffset == NO_MATCH || (newOffset == currentOffset && tokens.get(currentOffset).getType() != EOL)) {
				return currentOffset;
			}

			currentOffset = newOffset;

			newOffset = statementContinuationRecognizer.matches(parserState, tokens, currentOffset);
			if (newOffset == NO_MATCH || newOffset == currentOffset) {
				return currentOffset;
			}

			currentOffset = newOffset;
		}
	}

	/** {@inheritDoc} */
	@Override
	protected String getRecognizerStringRepresentation() {
		return super.getRecognizerStringRepresentation() + "[" + partialStatementRecognizer.toString() + ", "
				+ statementContinuationRecognizer.toString() + "]";
	}
}