eu.cqse.check.framework.shallowparser.framework.ShallowEntity Maven / Gradle / Ivy
Show all versions of teamscale-commons Show documentation
/*
* Copyright (c) CQSE GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.cqse.check.framework.shallowparser.framework;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.function.Predicate;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.conqat.lib.commons.assertion.CCSMAssert;
import org.conqat.lib.commons.collections.CollectionUtils;
import org.conqat.lib.commons.collections.UnmodifiableList;
import org.conqat.lib.commons.region.LineBasedRegion;
import org.conqat.lib.commons.string.StringUtils;
import eu.cqse.check.framework.scanner.ETokenType;
import eu.cqse.check.framework.scanner.IToken;
import eu.cqse.check.framework.shallowparser.SubTypeNames;
/**
* An entity resulting from shallow parsing. For classes outside of this
* package, this class is immutable. An exception is made for addChild which can
* be overridden by subclasses.
*
* Entities represent classes, methods, statements, etc.
*
* Parser errors are usually marked by the inclusion of incomplete entities. For
* these entities, the end values (index, offset, etc.) may be wrong or even
* invalid.
*/
public class ShallowEntity implements Serializable {
/** Version for serialization. */
private static final long serialVersionUID = 1L;
/** The type of this entity. */
private final EShallowEntityType type;
/**
* The subtype of this entity. Some well-known values are defined in
* {@link SubTypeNames}.
*/
private final String subtype;
/** The name of the entity (may be null or empty string). */
private @Nullable String name;
/** The parent entity (may be null for root entities). */
private ShallowEntity parent;
/** The list of children. */
private final List children = new ArrayList<>();
/** Whether this node has been completed, i.e. could be fully parsed. */
protected boolean completed = false;
/**
* The underlying list of tokens (as seen by the shallow parser, i.e. without
* comments)
*/
private final List tokens;
/**
* The index of the first token in the token list as seen by the shallow parser
*/
private int startTokenIndex;
/**
* The index of the end token (non-inclusive) in the token list as seen by the
* shallow parser
*/
private int endTokenIndex = -1;
/**
* Marks this node as continued. See {@link #isContinued()} for an explanation.
*/
private boolean continuedNode = false;
/** Constructor. */
public ShallowEntity(EShallowEntityType type, String subtype, String name, List tokens,
int startTokenIndex) {
this.type = type;
this.subtype = subtype;
this.name = name;
this.tokens = tokens;
this.startTokenIndex = startTokenIndex;
}
/** Constructor. */
public ShallowEntity(EShallowEntityType type, String subtype, String name, List tokens, int startTokenIndex,
int endTokenIndex, boolean completed, boolean continuedNode, List children) {
this(type, subtype, name, tokens, startTokenIndex);
this.endTokenIndex = endTokenIndex;
this.completed = completed;
this.continuedNode = continuedNode;
for (ShallowEntity child : children) {
addChild(child);
}
}
/**
* Returns the number of entity nodes including this entity, its children, grand
* children, etc.
*/
public int getEntityCount() {
int result = 1;
for (ShallowEntity child : children) {
result += child.getEntityCount();
}
return result;
}
/**
* Returns the number of entity nodes that are complete, including this entity,
* its children, grand children, etc.
*/
public int getCompleteEntityCount() {
int result = 0;
if (isCompleted()) {
result = 1;
}
for (ShallowEntity child : children) {
result += child.getCompleteEntityCount();
}
return result;
}
/** Returns the type. */
public EShallowEntityType getType() {
return type;
}
/**
* Returns the subtype. Some well-known values are defined in
* {@link SubTypeNames}.
*/
public String getSubtype() {
return subtype;
}
/**
* Returns the name of this entity. The result may be the empty String or
* null
.
*/
public @Nullable String getName() {
return name;
}
/** Sets the name. */
/* package */void setName(String name) {
this.name = name;
}
/** Returns whether this shallow entity has children. */
public boolean hasChildren() {
return !children.isEmpty();
}
/**
* Returns the children. Returns an empty list, if there are no children.
*/
public UnmodifiableList getChildren() {
return CollectionUtils.asUnmodifiable(children);
}
/**
* Adds a child entity. As multiple top-level shallow entities can occur within
* one file, this method can be overridden by subclasses that simulate a root
* node of a shallow entity tree.
*/
public void addChild(ShallowEntity child) {
CCSMAssert.isTrue(child.parent == null, "May not add entity to multiple parents!");
children.add(child);
child.parent = this;
}
/** Returns parent entity or {@code null} for the root. */
public @Nullable ShallowEntity getParent() {
return parent;
}
/**
* Returns a new list containing all children of the given type in order.
*/
public List getChildrenOfType(EShallowEntityType type) {
List result = new ArrayList<>();
for (ShallowEntity child : children) {
if (child.getType() == type) {
result.add(child);
}
}
return result;
}
/**
* Returns whether this node has been completed, i.e. could be fully parsed.
*/
public boolean isCompleted() {
return completed;
}
/**
* Marks this node as completed and stores whether we expect the node to be
* continued. An example for a continued node is an "if" followed by an "else".
* After parsing the "if" and its body, the entity for the "if" is complete, but
* we expect the if statement to logically continue with the else.
*/
/* package */ void setComplete(boolean continuedNode) {
setComplete();
this.continuedNode = continuedNode;
}
/** Marks this node as completed. */
public void setComplete() {
completed = true;
}
/**
* Returns whether this node is continued, which means that the next sibling
* node logically is associated to this one. This feature is used, e.g., to
* connect the if-block and the corresponding else-block, which are parsed into
* separate nodes, but the first (if) node will be marked as continued. Note
* that even if this is true, a next sibling might not exist (typically the
* result of parsing errors).
*/
public boolean isContinued() {
return continuedNode;
}
/** Sets the last (non-inclusive) token index. */
public void setEndTokenIndex(int endTokenIndex) {
this.endTokenIndex = endTokenIndex;
}
/**
* Returns the start token index. This is the index in a token list without
* comments.
*/
public int getStartTokenIndex() {
return startTokenIndex;
}
/**
* Returns the start token index relative to the parent (i.e. this is valid for
* the tokens from {@link #includedTokens()} called for the parent).
*/
public int getRelativeStartTokenIndex() {
if (parent == null) {
return getStartTokenIndex();
}
return getStartTokenIndex() - parent.getStartTokenIndex();
}
/**
* Returns the end token index relative to the parent (i.e. this is valid for
* the tokens from {@link #includedTokens()} called for the parent).
*/
public int getRelativeEndTokenIndex() {
if (parent == null) {
return getEndTokenIndex();
}
return getEndTokenIndex() - parent.getStartTokenIndex();
}
/** Returns the 1-based start line number. */
public int getStartLine() {
return getStartToken().getLineNumber() + 1;
}
/** Returns the line-based region of the entity. */
public LineBasedRegion getLineBasedRegion() {
return new LineBasedRegion(getStartLine(), getEndLine());
}
/** Return start token */
private IToken getStartToken() {
CCSMAssert.isTrue(hasValidStartToken(),
"Start token index '" + getStartTokenIndex() + "' out of bounds for token list of length '"
+ tokens.size() + "' for entity " + getType() + ":" + getSubtype() + ":" + getName());
return tokens.get(getStartTokenIndex());
}
/** Returns whether the entity has a valid start token. */
public boolean hasValidStartToken() {
return getStartTokenIndex() < tokens.size();
}
/** Returns the (inclusive) offset of the start token */
public int getStartOffset() {
return getStartToken().getOffset();
}
/**
* Return (inclusive) offset of end token. This might be not the very last
* token, if parsing errors occurred.
*/
public int getEndOffset() {
IToken endToken = getEndToken();
if (endToken == null) {
return getStartToken().getEndOffset();
}
return endToken.getEndOffset();
}
/**
* Returns the (exclusive) end token index. This is the index in a list of
* tokens without comments. This may not be the correct index, if parsing errors
* occurred.
*/
public int getEndTokenIndex() {
return endTokenIndex;
}
/**
* Returns the 1-based inclusive end line number. This might be not the very
* last line, if parsing errors occurred.
*/
public int getEndLine() {
// we have to calculate +1 to convert to 1-based lines
IToken endToken = getEndToken();
if (endToken == null) {
return getStartToken().getLineNumber() + 1;
}
return endToken.getLineNumber() + 1;
}
/** Return end token (or null if invalid). */
private IToken getEndToken() {
if (getEndTokenIndex() <= 0) {
return null;
}
return tokens.get(getEndTokenIndex() - 1);
}
/**
* Returns a view of the included tokens without comments. This is typically
* non-empty, but may be empty in very special cases, such as parsing files with
* invalid syntax. For such files, this may even be empty although child token
* lists are non-empty.
*/
public UnmodifiableList includedTokens() {
return readOnlyTokenView(getStartTokenIndex(), getEndTokenIndex());
}
/**
* Returns whether this is an empty entity, i.e. does not contain any tokens. An
* entity is empty only in very special cases, such as parsing files with
* invalid syntax.
*/
public boolean isEmpty() {
return getEndTokenIndex() <= getStartTokenIndex();
}
/**
* Returns a read-only view of the tokens from the (inclusive) start index to
* the (exclusive) end index.
*/
private UnmodifiableList readOnlyTokenView(int startIndex, int endIndex) {
endIndex = Math.min(endIndex, tokens.size());
// also check against negative values, to handle incomplete entities
if (endIndex <= startIndex || startIndex < 0) {
return CollectionUtils.emptyList();
}
return CollectionUtils.asUnmodifiable(tokens.subList(startIndex, endIndex));
}
/**
* Returns a view of the tokens (without comments) from the beginning of the
* entity up to the last token of the entity. It does not include the tokens of
* its children but it includes the tokens that may be arise between its
* children entities.
*
* Example:
*
*
* double[] doubles = ints.stream().mapToDouble( //
* i -> i * 5.0 //
* ).filter(Double::isNaN).filter( //
* d -> d > 0 //
* ).toArray();
*
*
* Here, the entity's own tokens are spread across the following three
* subsequences:
*
* double[] doubles = ints.stream().mapToDouble(
* ).filter(Double::isNaN).filter(
* ).toArray();
*
* The returned list may thus contain more than just {@link #ownStartTokens()}
* and {@link #ownEndTokens()}, as lambda expressions may introduce an arbitrary
* number of children.
*/
public List> ownTokens() {
List> ownTokens = new ArrayList<>();
int currentTokensBegin = getStartTokenIndex();
for (ShallowEntity child : children) {
ownTokens.add(readOnlyTokenView(currentTokensBegin, child.getStartTokenIndex()));
currentTokensBegin = child.getEndTokenIndex();
}
UnmodifiableList interval = readOnlyTokenView(currentTokensBegin, getEndTokenIndex());
if (!interval.isEmpty()) {
ownTokens.add(interval);
}
return ownTokens;
}
/**
* Returns a view of the tokens (without comments) from the beginning of the
* entity up to the first token included in the first child. For example for an
* if-block statement, this would include everything from the "if" to the first
* brace (inclusive). The first token of the first child statement would not be
* included. For an entity without children this will return
* {@link #includedTokens()}.
*/
public UnmodifiableList ownStartTokens() {
if (children.isEmpty()) {
return includedTokens();
}
// For 'if' and 'while' shallow entities with lambdas in the condition, the
// lambdas are parsed as children of its block. To determine the start tokens,
// these children are not regarded. The first child always have an LBRACE before
// it.
Set subTypeNames = CollectionUtils.asHashSet(SubTypeNames.IF, SubTypeNames.WHILE);
String subTypeOfFirstChild = children.get(0).getSubtype();
if (subTypeNames.contains(subtype) && (SubTypeNames.LAMBDA.equals(subTypeOfFirstChild)
|| SubTypeNames.LAMBDA_EXPRESSION.equals(subTypeOfFirstChild))) {
for (ShallowEntity child : children) {
int childStartIndex = child.getStartTokenIndex();
if (ETokenType.LBRACE == tokens.get(childStartIndex - 1).getType()) {
return readOnlyTokenView(getStartTokenIndex(), childStartIndex);
}
}
}
return readOnlyTokenView(getStartTokenIndex(), children.get(0).getStartTokenIndex());
}
/**
* For entities with children ({@link #hasChildren()}), this returns a
* view of the tokens (without comments) from the last token included in the
* last child up to the end of the entity. For example for a do-while statement,
* this would include everything from (inclusive) the RBRACE closing the body to
* the end of the "while". The last token of the last child statement would not
* be included. Returns an empty list if called for an entity without
* children.
*/
public UnmodifiableList ownEndTokens() {
if (children.isEmpty()) {
return CollectionUtils.emptyList();
}
// For 'do while' shallow entities with lambdas in the condition, the lambdas
// are parsed as children of its block. To determine the end tokens, these
// children are not regarded. The last child always have an LBRACE after it.
Set subTypeNames = CollectionUtils.asHashSet(SubTypeNames.DO, SubTypeNames.DO_WHILE);
String subTypeOfLastChild = CollectionUtils.getLast(children).getSubtype();
if (subTypeNames.contains(subtype) && (SubTypeNames.LAMBDA.equals(subTypeOfLastChild)
|| SubTypeNames.LAMBDA_EXPRESSION.equals(subTypeOfLastChild))) {
for (ShallowEntity child : CollectionUtils.reverse(children)) {
int childEndIndex = child.getEndTokenIndex();
ETokenType endToken = tokens.get(childEndIndex).getType();
ETokenType oneAfterEndToken = tokens.get(childEndIndex + 1).getType();
if (endToken == ETokenType.RBRACE && oneAfterEndToken == ETokenType.WHILE) {
return readOnlyTokenView(childEndIndex, getEndTokenIndex());
}
}
}
return readOnlyTokenView(CollectionUtils.getLast(children).getEndTokenIndex(), getEndTokenIndex());
}
/**
* {@inheritDoc}
*
* Before changing the output, note that we use this method also for regression
* testing.
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
appendAsIndentedString(sb, 0);
return sb.toString();
}
/**
* Returns a string representation of this node without including its children.
*/
public String toLocalString() {
return toLocalString(getStartLine(), getEndLine());
}
/**
* Returns a string representation of this node without including its children
* using the given line representation.
*/
private String toLocalString(int startLine, int endLine) {
String incomplete = StringUtils.EMPTY_STRING;
if (!isCompleted()) {
incomplete = " [incomplete]";
}
return type + ": " + subtype + ": " + name + " (lines " + startLine + "-" + endLine + ")" + incomplete;
}
/**
* Appends an indented localized string representation of the node and it's
* children to the given string builder.
*/
private void appendAsIndentedString(StringBuilder sb, int indent) {
sb.append(StringUtils.fillString(2 * indent, ' ')).append(toLocalString()).append(StringUtils.LINE_SEPARATOR);
for (ShallowEntity child : children) {
child.appendAsIndentedString(sb, indent + 1);
}
}
/**
* Traverses this entity depth-first. For details of visiting the entities, see
* {@link IShallowEntityVisitor}.
*/
public void traverse(IShallowEntityVisitor visitor) {
if (visitor.visit(this)) {
traverse(children, visitor);
}
visitor.endVisit(this);
}
/**
* Traverses each of the given entities depth-first. For details of visiting the
* entities, see {@link IShallowEntityVisitor}.
*/
public static void traverse(Collection entities, IShallowEntityVisitor visitor) {
for (ShallowEntity entity : entities) {
entity.traverse(visitor);
}
}
/**
* Removes all "contained" tokens (i.e. those for which
* {@link Predicate#test(Object)} returns true) from the underlying token stream
* and adjusts the entities accordingly.
*/
public static void filterTokens(Collection entities, Predicate predicate) {
if (entities.isEmpty()) {
return;
}
List tokens = CollectionUtils.getAny(entities).tokens;
List newTokens = new ArrayList<>();
List indexLookup = new ArrayList<>();
for (IToken token : tokens) {
indexLookup.add(newTokens.size());
if (!predicate.test(token)) {
newTokens.add(token);
}
}
indexLookup.add(newTokens.size());
tokens.clear();
tokens.addAll(newTokens);
traverse(entities, entity -> {
entity.startTokenIndex = indexLookup.get(entity.startTokenIndex);
if (entity.getEndTokenIndex() >= 0) {
entity.endTokenIndex = indexLookup.get(entity.getEndTokenIndex());
}
return true;
});
}
/**
* Clones the shallow entity and all children, but does not clone the tokens,
* because they must be the same for the whole shallow entity tree.
*/
public ShallowEntity deepCloneWithoutCloningTokens(List tokens) {
ShallowEntity clone = new ShallowEntity(this.getType(), this.getSubtype(), this.getName(), tokens,
this.startTokenIndex);
clone.endTokenIndex = this.getEndTokenIndex();
clone.completed = this.completed;
clone.continuedNode = this.continuedNode;
for (ShallowEntity child : this.getChildren()) {
ShallowEntity clonedChild = child.deepCloneWithoutCloningTokens(tokens);
clone.addChild(clonedChild);
clonedChild.parent = clone;
}
return clone;
}
/**
* Recursively removes all completely empty shallow entities, i.e. those without
* own start and end tokens and without tokens in children entities.
*/
public static void collapseEmptyEntities(Collection entities) {
List result = new ArrayList<>();
for (ShallowEntity entity : entities) {
entity.collapseEmptyEntities();
if (entity.isCollapsible()) {
result.addAll(entity.children);
} else {
result.add(entity);
}
}
entities.clear();
entities.addAll(result);
}
/**
* Removes all completely empty shallow entities, i.e. those without own start
* and end tokens, from the tree.
*/
public void collapseEmptyEntities() {
for (ShallowEntity child : children) {
child.collapseEmptyEntities();
}
List newChildren = new ArrayList<>();
for (ShallowEntity child : children) {
if (child.isCollapsible()) {
for (ShallowEntity subChild : child.children) {
newChildren.add(subChild);
subChild.parent = this;
}
} else {
newChildren.add(child);
}
}
children.clear();
children.addAll(newChildren);
}
/**
* Returns whether this is collapsible, i.e. has no own tokens and has no
* children with tokens.
*/
private boolean isCollapsible() {
return readOnlyTokenView(getStartTokenIndex(), getEndTokenIndex()).isEmpty();
}
/**
* The underlying list of all tokens. For the tokens of this entity use
* {@link #ownTokens()}. For the start tokens of this entity use
* {@link #ownStartTokens()}. For the end tokens of this entity use
* {@link #ownEndTokens()}.
*
* @see #tokens
*/
public List getAllTokens() {
return tokens;
}
}