All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.cqse.check.framework.shallowparser.framework.ShallowEntity Maven / Gradle / Ivy

There is a newer version: 2025.1.0-rc2
Show newest version
/*
 * Copyright (c) CQSE GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package eu.cqse.check.framework.shallowparser.framework;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.function.Predicate;

import org.checkerframework.checker.nullness.qual.Nullable;
import org.conqat.lib.commons.assertion.CCSMAssert;
import org.conqat.lib.commons.collections.CollectionUtils;
import org.conqat.lib.commons.collections.UnmodifiableList;
import org.conqat.lib.commons.region.LineBasedRegion;
import org.conqat.lib.commons.string.StringUtils;

import eu.cqse.check.framework.scanner.ETokenType;
import eu.cqse.check.framework.scanner.IToken;
import eu.cqse.check.framework.shallowparser.SubTypeNames;

/**
 * An entity resulting from shallow parsing. For classes outside of this
 * package, this class is immutable. An exception is made for addChild which can
 * be overridden by subclasses.
 *
 * Entities represent classes, methods, statements, etc.
 *
 * Parser errors are usually marked by the inclusion of incomplete entities. For
 * these entities, the end values (index, offset, etc.) may be wrong or even
 * invalid.
 */
public class ShallowEntity implements Serializable {

	/** Version for serialization. */
	private static final long serialVersionUID = 1L;

	/** The type of this entity. */
	private final EShallowEntityType type;

	/**
	 * The subtype of this entity. Some well-known values are defined in
	 * {@link SubTypeNames}.
	 */
	private final String subtype;

	/** The name of the entity (may be null or empty string). */
	private @Nullable String name;

	/** The parent entity (may be null for root entities). */
	private ShallowEntity parent;

	/** The list of children. */
	private final List children = new ArrayList<>();

	/** Whether this node has been completed, i.e. could be fully parsed. */
	protected boolean completed = false;

	/**
	 * The underlying list of tokens (as seen by the shallow parser, i.e. without
	 * comments)
	 */
	private final List tokens;

	/**
	 * The index of the first token in the token list as seen by the shallow parser
	 */
	private int startTokenIndex;

	/**
	 * The index of the end token (non-inclusive) in the token list as seen by the
	 * shallow parser
	 */
	private int endTokenIndex = -1;

	/**
	 * Marks this node as continued. See {@link #isContinued()} for an explanation.
	 */
	private boolean continuedNode = false;

	/** Constructor. */
	public ShallowEntity(EShallowEntityType type, String subtype, String name, List tokens,
			int startTokenIndex) {
		this.type = type;
		this.subtype = subtype;
		this.name = name;
		this.tokens = tokens;
		this.startTokenIndex = startTokenIndex;
	}

	/** Constructor. */
	public ShallowEntity(EShallowEntityType type, String subtype, String name, List tokens, int startTokenIndex,
			int endTokenIndex, boolean completed, boolean continuedNode, List children) {
		this(type, subtype, name, tokens, startTokenIndex);
		this.endTokenIndex = endTokenIndex;
		this.completed = completed;
		this.continuedNode = continuedNode;

		for (ShallowEntity child : children) {
			addChild(child);
		}
	}

	/**
	 * Returns the number of entity nodes including this entity, its children, grand
	 * children, etc.
	 */
	public int getEntityCount() {
		int result = 1;
		for (ShallowEntity child : children) {
			result += child.getEntityCount();
		}
		return result;
	}

	/**
	 * Returns the number of entity nodes that are complete, including this entity,
	 * its children, grand children, etc.
	 */
	public int getCompleteEntityCount() {
		int result = 0;
		if (isCompleted()) {
			result = 1;
		}
		for (ShallowEntity child : children) {
			result += child.getCompleteEntityCount();
		}
		return result;
	}

	/** Returns the type. */
	public EShallowEntityType getType() {
		return type;
	}

	/**
	 * Returns the subtype. Some well-known values are defined in
	 * {@link SubTypeNames}.
	 */
	public String getSubtype() {
		return subtype;
	}

	/**
	 * Returns the name of this entity. The result may be the empty String or
	 * null.
	 */
	public @Nullable String getName() {
		return name;
	}

	/** Sets the name. */
	/* package */void setName(String name) {
		this.name = name;
	}

	/** Returns whether this shallow entity has children. */
	public boolean hasChildren() {
		return !children.isEmpty();
	}

	/**
	 * Returns the children. Returns an empty list, if there are no children.
	 */
	public UnmodifiableList getChildren() {
		return CollectionUtils.asUnmodifiable(children);
	}

	/**
	 * Adds a child entity. As multiple top-level shallow entities can occur within
	 * one file, this method can be overridden by subclasses that simulate a root
	 * node of a shallow entity tree.
	 */
	public void addChild(ShallowEntity child) {
		CCSMAssert.isTrue(child.parent == null, "May not add entity to multiple parents!");
		children.add(child);
		child.parent = this;
	}

	/** Returns parent entity or {@code null} for the root. */
	public @Nullable ShallowEntity getParent() {
		return parent;
	}

	/**
	 * Returns a new list containing all children of the given type in order.
	 */
	public List getChildrenOfType(EShallowEntityType type) {
		List result = new ArrayList<>();
		for (ShallowEntity child : children) {
			if (child.getType() == type) {
				result.add(child);
			}
		}
		return result;
	}

	/**
	 * Returns whether this node has been completed, i.e. could be fully parsed.
	 */
	public boolean isCompleted() {
		return completed;
	}

	/**
	 * Marks this node as completed and stores whether we expect the node to be
	 * continued. An example for a continued node is an "if" followed by an "else".
	 * After parsing the "if" and its body, the entity for the "if" is complete, but
	 * we expect the if statement to logically continue with the else.
	 */
	/* package */ void setComplete(boolean continuedNode) {
		setComplete();
		this.continuedNode = continuedNode;
	}

	/** Marks this node as completed. */
	public void setComplete() {
		completed = true;
	}

	/**
	 * Returns whether this node is continued, which means that the next sibling
	 * node logically is associated to this one. This feature is used, e.g., to
	 * connect the if-block and the corresponding else-block, which are parsed into
	 * separate nodes, but the first (if) node will be marked as continued. Note
	 * that even if this is true, a next sibling might not exist (typically the
	 * result of parsing errors).
	 */
	public boolean isContinued() {
		return continuedNode;
	}

	/** Sets the last (non-inclusive) token index. */
	public void setEndTokenIndex(int endTokenIndex) {
		this.endTokenIndex = endTokenIndex;
	}

	/**
	 * Returns the start token index. This is the index in a token list without
	 * comments.
	 */
	public int getStartTokenIndex() {
		return startTokenIndex;
	}

	/**
	 * Returns the start token index relative to the parent (i.e. this is valid for
	 * the tokens from {@link #includedTokens()} called for the parent).
	 */
	public int getRelativeStartTokenIndex() {
		if (parent == null) {
			return getStartTokenIndex();
		}
		return getStartTokenIndex() - parent.getStartTokenIndex();
	}

	/**
	 * Returns the end token index relative to the parent (i.e. this is valid for
	 * the tokens from {@link #includedTokens()} called for the parent).
	 */
	public int getRelativeEndTokenIndex() {
		if (parent == null) {
			return getEndTokenIndex();
		}
		return getEndTokenIndex() - parent.getStartTokenIndex();
	}

	/** Returns the 1-based start line number. */
	public int getStartLine() {
		return getStartToken().getLineNumber() + 1;
	}

	/** Returns the line-based region of the entity. */
	public LineBasedRegion getLineBasedRegion() {
		return new LineBasedRegion(getStartLine(), getEndLine());
	}

	/** Return start token */
	private IToken getStartToken() {
		CCSMAssert.isTrue(hasValidStartToken(),
				"Start token index '" + getStartTokenIndex() + "' out of bounds for token list of length '"
						+ tokens.size() + "' for entity " + getType() + ":" + getSubtype() + ":" + getName());
		return tokens.get(getStartTokenIndex());
	}

	/** Returns whether the entity has a valid start token. */
	public boolean hasValidStartToken() {
		return getStartTokenIndex() < tokens.size();
	}

	/** Returns the (inclusive) offset of the start token */
	public int getStartOffset() {
		return getStartToken().getOffset();
	}

	/**
	 * Return (inclusive) offset of end token. This might be not the very last
	 * token, if parsing errors occurred.
	 */
	public int getEndOffset() {
		IToken endToken = getEndToken();
		if (endToken == null) {
			return getStartToken().getEndOffset();
		}
		return endToken.getEndOffset();
	}

	/**
	 * Returns the (exclusive) end token index. This is the index in a list of
	 * tokens without comments. This may not be the correct index, if parsing errors
	 * occurred.
	 */
	public int getEndTokenIndex() {
		return endTokenIndex;
	}

	/**
	 * Returns the 1-based inclusive end line number. This might be not the very
	 * last line, if parsing errors occurred.
	 */
	public int getEndLine() {
		// we have to calculate +1 to convert to 1-based lines
		IToken endToken = getEndToken();
		if (endToken == null) {
			return getStartToken().getLineNumber() + 1;
		}
		return endToken.getLineNumber() + 1;
	}

	/** Return end token (or null if invalid). */
	private IToken getEndToken() {
		if (getEndTokenIndex() <= 0) {
			return null;
		}
		return tokens.get(getEndTokenIndex() - 1);
	}

	/**
	 * Returns a view of the included tokens without comments. This is typically
	 * non-empty, but may be empty in very special cases, such as parsing files with
	 * invalid syntax. For such files, this may even be empty although child token
	 * lists are non-empty.
	 */
	public UnmodifiableList includedTokens() {
		return readOnlyTokenView(getStartTokenIndex(), getEndTokenIndex());
	}

	/**
	 * Returns whether this is an empty entity, i.e. does not contain any tokens. An
	 * entity is empty only in very special cases, such as parsing files with
	 * invalid syntax.
	 */
	public boolean isEmpty() {
		return getEndTokenIndex() <= getStartTokenIndex();
	}

	/**
	 * Returns a read-only view of the tokens from the (inclusive) start index to
	 * the (exclusive) end index.
	 */
	private UnmodifiableList readOnlyTokenView(int startIndex, int endIndex) {
		endIndex = Math.min(endIndex, tokens.size());

		// also check against negative values, to handle incomplete entities
		if (endIndex <= startIndex || startIndex < 0) {
			return CollectionUtils.emptyList();
		}
		return CollectionUtils.asUnmodifiable(tokens.subList(startIndex, endIndex));
	}

	/**
	 * Returns a view of the tokens (without comments) from the beginning of the
	 * entity up to the last token of the entity. It does not include the tokens of
	 * its children but it includes the tokens that may be arise between its
	 * children entities.
	 * 

* Example: * *

	 * double[] doubles = ints.stream().mapToDouble( //
	 * 		i -> i * 5.0 //
	 * ).filter(Double::isNaN).filter( //
	 * 		d -> d > 0 //
	 * ).toArray();
	 * 
* * Here, the entity's own tokens are spread across the following three * subsequences: *
    *
  1. double[] doubles = ints.stream().mapToDouble(
  2. *
  3. ).filter(Double::isNaN).filter(
  4. *
  5. ).toArray();
  6. *
* The returned list may thus contain more than just {@link #ownStartTokens()} * and {@link #ownEndTokens()}, as lambda expressions may introduce an arbitrary * number of children. */ public List> ownTokens() { List> ownTokens = new ArrayList<>(); int currentTokensBegin = getStartTokenIndex(); for (ShallowEntity child : children) { ownTokens.add(readOnlyTokenView(currentTokensBegin, child.getStartTokenIndex())); currentTokensBegin = child.getEndTokenIndex(); } UnmodifiableList interval = readOnlyTokenView(currentTokensBegin, getEndTokenIndex()); if (!interval.isEmpty()) { ownTokens.add(interval); } return ownTokens; } /** * Returns a view of the tokens (without comments) from the beginning of the * entity up to the first token included in the first child. For example for an * if-block statement, this would include everything from the "if" to the first * brace (inclusive). The first token of the first child statement would not be * included. For an entity without children this will return * {@link #includedTokens()}. */ public UnmodifiableList ownStartTokens() { if (children.isEmpty()) { return includedTokens(); } // For 'if' and 'while' shallow entities with lambdas in the condition, the // lambdas are parsed as children of its block. To determine the start tokens, // these children are not regarded. The first child always have an LBRACE before // it. Set subTypeNames = CollectionUtils.asHashSet(SubTypeNames.IF, SubTypeNames.WHILE); String subTypeOfFirstChild = children.get(0).getSubtype(); if (subTypeNames.contains(subtype) && (SubTypeNames.LAMBDA.equals(subTypeOfFirstChild) || SubTypeNames.LAMBDA_EXPRESSION.equals(subTypeOfFirstChild))) { for (ShallowEntity child : children) { int childStartIndex = child.getStartTokenIndex(); if (ETokenType.LBRACE == tokens.get(childStartIndex - 1).getType()) { return readOnlyTokenView(getStartTokenIndex(), childStartIndex); } } } return readOnlyTokenView(getStartTokenIndex(), children.get(0).getStartTokenIndex()); } /** * For entities with children ({@link #hasChildren()}), this returns a * view of the tokens (without comments) from the last token included in the * last child up to the end of the entity. For example for a do-while statement, * this would include everything from (inclusive) the RBRACE closing the body to * the end of the "while". The last token of the last child statement would not * be included. Returns an empty list if called for an entity without * children. */ public UnmodifiableList ownEndTokens() { if (children.isEmpty()) { return CollectionUtils.emptyList(); } // For 'do while' shallow entities with lambdas in the condition, the lambdas // are parsed as children of its block. To determine the end tokens, these // children are not regarded. The last child always have an LBRACE after it. Set subTypeNames = CollectionUtils.asHashSet(SubTypeNames.DO, SubTypeNames.DO_WHILE); String subTypeOfLastChild = CollectionUtils.getLast(children).getSubtype(); if (subTypeNames.contains(subtype) && (SubTypeNames.LAMBDA.equals(subTypeOfLastChild) || SubTypeNames.LAMBDA_EXPRESSION.equals(subTypeOfLastChild))) { for (ShallowEntity child : CollectionUtils.reverse(children)) { int childEndIndex = child.getEndTokenIndex(); ETokenType endToken = tokens.get(childEndIndex).getType(); ETokenType oneAfterEndToken = tokens.get(childEndIndex + 1).getType(); if (endToken == ETokenType.RBRACE && oneAfterEndToken == ETokenType.WHILE) { return readOnlyTokenView(childEndIndex, getEndTokenIndex()); } } } return readOnlyTokenView(CollectionUtils.getLast(children).getEndTokenIndex(), getEndTokenIndex()); } /** * {@inheritDoc} *

* Before changing the output, note that we use this method also for regression * testing. */ @Override public String toString() { StringBuilder sb = new StringBuilder(); appendAsIndentedString(sb, 0); return sb.toString(); } /** * Returns a string representation of this node without including its children. */ public String toLocalString() { return toLocalString(getStartLine(), getEndLine()); } /** * Returns a string representation of this node without including its children * using the given line representation. */ private String toLocalString(int startLine, int endLine) { String incomplete = StringUtils.EMPTY_STRING; if (!isCompleted()) { incomplete = " [incomplete]"; } return type + ": " + subtype + ": " + name + " (lines " + startLine + "-" + endLine + ")" + incomplete; } /** * Appends an indented localized string representation of the node and it's * children to the given string builder. */ private void appendAsIndentedString(StringBuilder sb, int indent) { sb.append(StringUtils.fillString(2 * indent, ' ')).append(toLocalString()).append(StringUtils.LINE_SEPARATOR); for (ShallowEntity child : children) { child.appendAsIndentedString(sb, indent + 1); } } /** * Traverses this entity depth-first. For details of visiting the entities, see * {@link IShallowEntityVisitor}. */ public void traverse(IShallowEntityVisitor visitor) { if (visitor.visit(this)) { traverse(children, visitor); } visitor.endVisit(this); } /** * Traverses each of the given entities depth-first. For details of visiting the * entities, see {@link IShallowEntityVisitor}. */ public static void traverse(Collection entities, IShallowEntityVisitor visitor) { for (ShallowEntity entity : entities) { entity.traverse(visitor); } } /** * Removes all "contained" tokens (i.e. those for which * {@link Predicate#test(Object)} returns true) from the underlying token stream * and adjusts the entities accordingly. */ public static void filterTokens(Collection entities, Predicate predicate) { if (entities.isEmpty()) { return; } List tokens = CollectionUtils.getAny(entities).tokens; List newTokens = new ArrayList<>(); List indexLookup = new ArrayList<>(); for (IToken token : tokens) { indexLookup.add(newTokens.size()); if (!predicate.test(token)) { newTokens.add(token); } } indexLookup.add(newTokens.size()); tokens.clear(); tokens.addAll(newTokens); traverse(entities, entity -> { entity.startTokenIndex = indexLookup.get(entity.startTokenIndex); if (entity.getEndTokenIndex() >= 0) { entity.endTokenIndex = indexLookup.get(entity.getEndTokenIndex()); } return true; }); } /** * Clones the shallow entity and all children, but does not clone the tokens, * because they must be the same for the whole shallow entity tree. */ public ShallowEntity deepCloneWithoutCloningTokens(List tokens) { ShallowEntity clone = new ShallowEntity(this.getType(), this.getSubtype(), this.getName(), tokens, this.startTokenIndex); clone.endTokenIndex = this.getEndTokenIndex(); clone.completed = this.completed; clone.continuedNode = this.continuedNode; for (ShallowEntity child : this.getChildren()) { ShallowEntity clonedChild = child.deepCloneWithoutCloningTokens(tokens); clone.addChild(clonedChild); clonedChild.parent = clone; } return clone; } /** * Recursively removes all completely empty shallow entities, i.e. those without * own start and end tokens and without tokens in children entities. */ public static void collapseEmptyEntities(Collection entities) { List result = new ArrayList<>(); for (ShallowEntity entity : entities) { entity.collapseEmptyEntities(); if (entity.isCollapsible()) { result.addAll(entity.children); } else { result.add(entity); } } entities.clear(); entities.addAll(result); } /** * Removes all completely empty shallow entities, i.e. those without own start * and end tokens, from the tree. */ public void collapseEmptyEntities() { for (ShallowEntity child : children) { child.collapseEmptyEntities(); } List newChildren = new ArrayList<>(); for (ShallowEntity child : children) { if (child.isCollapsible()) { for (ShallowEntity subChild : child.children) { newChildren.add(subChild); subChild.parent = this; } } else { newChildren.add(child); } } children.clear(); children.addAll(newChildren); } /** * Returns whether this is collapsible, i.e. has no own tokens and has no * children with tokens. */ private boolean isCollapsible() { return readOnlyTokenView(getStartTokenIndex(), getEndTokenIndex()).isEmpty(); } /** * The underlying list of all tokens. For the tokens of this entity use * {@link #ownTokens()}. For the start tokens of this entity use * {@link #ownStartTokens()}. For the end tokens of this entity use * {@link #ownEndTokens()}. * * @see #tokens */ public List getAllTokens() { return tokens; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy