All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.cqse.check.framework.shallowparser.languages.javascript.JavaScriptSimpleStatementRecognizer Maven / Gradle / Ivy

/*
 * Copyright (c) CQSE GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package eu.cqse.check.framework.shallowparser.languages.javascript;

import static eu.cqse.check.framework.scanner.ETokenType.BREAK;
import static eu.cqse.check.framework.scanner.ETokenType.COLON;
import static eu.cqse.check.framework.scanner.ETokenType.COMMA;
import static eu.cqse.check.framework.scanner.ETokenType.CONTINUE;
import static eu.cqse.check.framework.scanner.ETokenType.DOT;
import static eu.cqse.check.framework.scanner.ETokenType.DOUBLE_ARROW;
import static eu.cqse.check.framework.scanner.ETokenType.FUNCTION;
import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
import static eu.cqse.check.framework.scanner.ETokenType.LBRACK;
import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
import static eu.cqse.check.framework.scanner.ETokenType.MINUSMINUS;
import static eu.cqse.check.framework.scanner.ETokenType.PLUSPLUS;
import static eu.cqse.check.framework.scanner.ETokenType.QUESTION;
import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
import static eu.cqse.check.framework.scanner.ETokenType.RETURN;
import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;
import static eu.cqse.check.framework.scanner.ETokenType.TEMPLATE_LITERAL;
import static eu.cqse.check.framework.scanner.ETokenType.THROW;
import static eu.cqse.check.framework.shallowparser.languages.javascript.JavaScriptShallowParser.EJavaScriptParserStates.ANY;
import static eu.cqse.check.framework.shallowparser.languages.javascript.JavaScriptShallowParser.EJavaScriptParserStates.IN_TYPESCRIPT_TYPE;

import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
import java.util.Stack;

import eu.cqse.check.framework.scanner.ETokenType;
import eu.cqse.check.framework.scanner.ETokenType.ETokenClass;
import eu.cqse.check.framework.scanner.IToken;
import eu.cqse.check.framework.shallowparser.TokenStreamUtils;
import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType;
import eu.cqse.check.framework.shallowparser.languages.base.LineBasedStatementRecognizerBase;
import eu.cqse.check.framework.shallowparser.languages.javascript.JavaScriptShallowParser.EJavaScriptParserStates;

/**
 * Recognizer for simple statements in JavaScript. We need a separate recognizer as the rules for
 * statement continuation are non-trivial due to the optional semicolon. A good introduction to the
 * topic can be found
 * here.
 */
/* package */class JavaScriptSimpleStatementRecognizer
		extends LineBasedStatementRecognizerBase {

	/** The type of the created node. */
	private final EShallowEntityType type;

	/** The subtype of the created node. */
	private final String subType;

	/**
	 * Whether TypeScript type signatures may occur within the parsed statement itself. May be true e.g.
	 * for attribute definitions in interfaces etc. If this is false, any type signatures will be
	 * treated like lambdas.
	 */
	private final boolean typeSignaturesMayOccurInStatement;

	/** Flag to control whether we are processing a lambda expression */
	private final boolean inLambdaExpression;

	/**
	 * Whether the last execution of {@link #tokenStartsSubParse(ETokenType, List, int, Stack, int)}
	 * determined the start of a typescript type and therefore parsing should continue in
	 * IN_TYPESCRIPT_TYPE.
	 */
	private boolean startsType = false;

	public JavaScriptSimpleStatementRecognizer(EShallowEntityType type, String subType,
			boolean typeSignaturesMayOccurInStatement, boolean inLambdaExpression) {
		this.type = type;
		this.subType = subType;
		this.typeSignaturesMayOccurInStatement = typeSignaturesMayOccurInStatement;
		this.inLambdaExpression = inLambdaExpression;
	}

	public JavaScriptSimpleStatementRecognizer(EShallowEntityType type, String subType,
			boolean typeSignaturesMayOccurInStatement) {
		this(type, subType, typeSignaturesMayOccurInStatement, false);
	}

	@Override
	protected EJavaScriptParserStates getSubParseState() {
		if (startsType) {
			startsType = false;
			return IN_TYPESCRIPT_TYPE;
		}
		return ANY;
	}

	@Override
	protected boolean tokenStartsSubParse(ETokenType tokenType, List tokens, int offset,
			Stack expectedClosing, int nodeStart) {
		if (JavaScriptShallowParser.ALL_IDENTIFIERS.contains(tokenType)) {
			return startsNamedFunction(tokens, offset) || startsBareLambda(tokens, offset);
		} else if (tokenType == FUNCTION) {
			// foo.function() is a valid statement
			boolean isFunctionCall = offset > 0 && tokens.get(offset - 1).getType() == DOT;
			// can define a function named `function`
			boolean isAttributeFunction = offset <= tokens.size() - 2 && tokens.get(offset + 1).getType() == COLON;
			return !isFunctionCall && !isAttributeFunction;
		} else if (JavaScriptShallowParser.PROPERTY_NAME.contains(tokenType) && type == EShallowEntityType.ATTRIBUTE) {
			return startsNamedFunction(tokens, offset);
		} else if (tokenType == LPAREN) {
			return startsParenthesisLambda(tokens, offset);
		} else if (tokenType == COLON) {
			// A colon can either appear in combination with a ternary operator "a ? b : c"
			boolean ternaryAlternative = isTernaryAlternative(tokens, offset, nodeStart);
			// Or within an object literal "{ a: b }"
			boolean isInObject = !expectedClosing.isEmpty() && expectedClosing.peek() == RBRACE;
			startsType = !isInObject && !ternaryAlternative;
			return startsType;
		}
		return false;
	}

	/**
	 * Determines whether there is a matching "?" at the same nesting level as the current colon after
	 * statementStart.
	 */
	private static boolean isTernaryAlternative(List tokens, int colonOffset, int statementStart) {
		int currentOffset = colonOffset - 1;
		int openBraces = 0;
		for (; currentOffset >= statementStart; currentOffset--) {
			ETokenType currentTokenType = tokens.get(currentOffset).getType();
			if (Arrays.asList(LPAREN, LBRACE, LBRACK).contains(currentTokenType)) {
				openBraces--;
			} else if (Arrays.asList(RPAREN, RBRACE, RBRACK).contains(currentTokenType)) {
				openBraces++;
			}
			// Ignore optional chaining (?.) and optional variables (?:)
			if (openBraces == 0 && QUESTION == currentTokenType
					&& !EnumSet.of(DOT, COLON).contains(tokens.get(currentOffset + 1).getType())) {
				return true;
			} else if (openBraces < 0) {
				return false;
			}
		}
		return false;
	}

	/**
	 * Returns whether at the given position there is the start of a lambda whose arguments are in
	 * parentheses.
	 */
	private boolean startsParenthesisLambda(List tokens, int offset) {
		if (offset > 0) {
			ETokenType lastTokenType = tokens.get(offset - 1).getType();

			if (lastTokenType == ETokenType.IDENTIFIER) {
				// plain old function call
				return false;
			} else if (lastTokenType == ETokenType.GT) {
				int ltIndex = TokenStreamUtils.findMatchingOpeningToken(tokens, offset - 2, ETokenType.LT,
						ETokenType.GT);
				if (ltIndex > 1 && tokens.get(ltIndex - 1).getType() == IDENTIFIER) {
					return false;
				}
			}
		}

		int closingPosition = TokenStreamUtils.findMatchingClosingToken(tokens, offset + 1, LPAREN, RPAREN);
		if (closingPosition == TokenStreamUtils.NOT_FOUND) {
			return false;
		}

		// This function is the most expensive one, therefore we execute it as late as
		// possible
		int arrowIndex = getArrowIndex(tokens, offset);
		if (arrowIndex == TokenStreamUtils.NOT_FOUND || closingPosition > arrowIndex) {
			return false;
		}

		if (closingPosition + 1 < tokens.size() && tokens.get(closingPosition + 1).getType() == COLON
				&& typeSignaturesMayOccurInStatement) {
			// there is a type signature after the argument brackets, so we need
			// to skip it, e.g: () : any => { ... }
			return true;
		}

		return TokenStreamUtils.hasTokenTypeSequence(tokens, closingPosition + 1, DOUBLE_ARROW);
	}

	/**
	 * Returns the index of the next '=>' in the tokens beyond the offset in this statement, i.e. before
	 * the next semicolon. Skips semicola occuring in object type definitions.
	 */
	private static int getArrowIndex(List tokens, int offset) {
		int firstArrowIndex = TokenStreamUtils.firstTokenOfType(tokens, offset, DOUBLE_ARROW);
		if (firstArrowIndex == NO_MATCH) {
			return NO_MATCH;
		}

		int lastSemicolonIndexInParams = TokenStreamUtils.lastTokenOfType(tokens.subList(offset, firstArrowIndex),
				SEMICOLON);

		if (lastSemicolonIndexInParams == TokenStreamUtils.NOT_FOUND
				|| areSemicolaPartOfObjectType(tokens.subList(offset, offset + lastSemicolonIndexInParams + 1))) {
			return firstArrowIndex;
		}

		// Return NO_MATCH if statement is terminated by semicolon
		return NO_MATCH;
	}

	/** Checks whether all semicola in the list are part of object types. */
	private static boolean areSemicolaPartOfObjectType(List tokens) {
		int countLBrace = 0;
		int countRBrace = 0;
		for (IToken token : tokens) {
			switch (token.getType()) {
			case LBRACE:
				countLBrace++;
				break;
			case RBRACE:
				countRBrace++;
				break;
			case SEMICOLON:
				if (countLBrace <= countRBrace) {
					return false;
				}
				break;
			default:
				// Ignore other types
				break;
			}
		}
		return true;
	}

	/**
	 * Returns whether at the given position there is the start of a bare lambda, i.e. one without
	 * surrounding parentheses.
	 */
	private static boolean startsBareLambda(List tokens, int offset) {
		return offset + 2 < tokens.size() && tokens.get(offset + 1).getType() == ETokenType.DOUBLE_ARROW;
	}

	/**
	 * Returns whether an identifier or string literal at the given position starts a named function.
	 */
	private static boolean startsNamedFunction(List tokens, int offset) {
		return startsOldStyleFunction(tokens, offset) || startsNewStyleFunction(tokens, offset);
	}

	/**
	 * Returns whether this starts a "new-style" function, i.e. direct name + parameters, followed by
	 * function body.
	 */
	private static boolean startsNewStyleFunction(List tokens, int offset) {
		if (offset + 1 >= tokens.size() || tokens.get(offset + 1).getType() != ETokenType.LPAREN) {
			return false;
		}

		int closingParen = TokenStreamUtils.findMatchingClosingToken(tokens, offset + 2, LPAREN, ETokenType.RPAREN);
		if (closingParen == TokenStreamUtils.NOT_FOUND || closingParen + 1 >= tokens.size()) {
			return false;
		}

		return tokens.get(closingParen + 1).getType() == ETokenType.LBRACE;
	}

	/**
	 * Returns whether this starts an "old-style" function with explicit colon and function keyword.
	 */
	private static boolean startsOldStyleFunction(List tokens, int offset) {
		return offset + 2 < tokens.size() && tokens.get(offset + 1).getType() == ETokenType.COLON
				&& tokens.get(offset + 2).getType() == ETokenType.FUNCTION;
	}

	@Override
	protected boolean startsNewStatement(IToken token, IToken lastToken) {
		ETokenType tokenType = token.getType();
		if (tokenType == RBRACE) {
			return true;
		}

		if (lastToken == null) {
			return false;
		}

		if (inLambdaExpression && (tokenType == COMMA || tokenType == SEMICOLON)) {
			return true;
		}

		// same line => no new statement
		if (lastToken.getLineNumber() == token.getLineNumber()) {
			return false;
		}

		ETokenType lastTokenType = lastToken.getType();

		// jump statements always end at a new line
		if (lastTokenType == RETURN || lastTokenType == BREAK || lastTokenType == CONTINUE || lastTokenType == THROW) {
			return true;
		}

		// ++ and -- bind to next line
		if (tokenType == PLUSPLUS || tokenType == MINUSMINUS) {
			return true;
		}

		// continue statement if line ends with '.' or ','
		if (lastTokenType == DOT || (lastTokenType == COMMA && type != EShallowEntityType.ATTRIBUTE)) {
			return false;
		}

		// continue while we are in a tagged template string
		if (lastTokenType == IDENTIFIER && tokenType == TEMPLATE_LITERAL) {
			return false;
		}

		// continue statement if line ends in operator or next line starts
		// with operator or delimiter
		return lastTokenType.getTokenClass() != ETokenClass.OPERATOR
				&& tokenType.getTokenClass() != ETokenClass.OPERATOR
				&& tokenType.getTokenClass() != ETokenClass.DELIMITER;
	}

	@Override
	protected EShallowEntityType getEntityType() {
		return type;
	}

	@Override
	protected String getEntitySubtypeName() {
		return subType;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy