org.antlr.v4.runtime.tree.pattern.ParseTreePatternMatcher Maven / Gradle / Ivy
Show all versions of antlr4-runtime Show documentation
/*
* Copyright (c) 2012 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD-3-Clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.runtime.tree.pattern;
import org.antlr.v4.runtime.BailErrorStrategy;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.ListTokenSource;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserInterpreter;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.misc.MultiMap;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import org.antlr.v4.runtime.misc.ParseCancellationException;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.RuleNode;
import org.antlr.v4.runtime.tree.TerminalNode;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* A tree pattern matching mechanism for ANTLR {@link ParseTree}s.
*
* Patterns are strings of source input text with special tags representing
* token or rule references such as:
*
* {@code = ;}
*
* Given a pattern start rule such as {@code statement}, this object constructs
* a {@link ParseTree} with placeholders for the {@code ID} and {@code expr}
* subtree. Then the {@link #match} routines can compare an actual
* {@link ParseTree} from a parse with this pattern. Tag {@code } matches
* any {@code ID} token and tag {@code } references the result of the
* {@code expr} rule (generally an instance of {@code ExprContext}.
*
* Pattern {@code x = 0;} is a similar pattern that matches the same pattern
* except that it requires the identifier to be {@code x} and the expression to
* be {@code 0}.
*
* The {@link #matches} routines return {@code true} or {@code false} based
* upon a match for the tree rooted at the parameter sent in. The
* {@link #match} routines return a {@link ParseTreeMatch} object that
* contains the parse tree, the parse tree pattern, and a map from tag name to
* matched nodes (more below). A subtree that fails to match, returns with
* {@link ParseTreeMatch#mismatchedNode} set to the first tree node that did not
* match.
*
* For efficiency, you can compile a tree pattern in string form to a
* {@link ParseTreePattern} object.
*
* See {@code TestParseTreeMatcher} for lots of examples.
* {@link ParseTreePattern} has two static helper methods:
* {@link ParseTreePattern#findAll} and {@link ParseTreePattern#match} that
* are easy to use but not super efficient because they create new
* {@link ParseTreePatternMatcher} objects each time and have to compile the
* pattern in string form before using it.
*
* The lexer and parser that you pass into the {@link ParseTreePatternMatcher}
* constructor are used to parse the pattern in string form. The lexer converts
* the {@code = ;} into a sequence of four tokens (assuming lexer
* throws out whitespace or puts it on a hidden channel). Be aware that the
* input stream is reset for the lexer (but not the parser; a
* {@link ParserInterpreter} is created to parse the input.). Any user-defined
* fields you have put into the lexer might get changed when this mechanism asks
* it to scan the pattern string.
*
* Normally a parser does not accept token {@code } as a valid
* {@code expr} but, from the parser passed in, we create a special version of
* the underlying grammar representation (an {@link ATN}) that allows imaginary
* tokens representing rules ({@code }) to match entire rules. We call
* these bypass alternatives.
*
* Delimiters are {@code <} and {@code >}, with {@code \} as the escape string
* by default, but you can set them to whatever you want using
* {@link #setDelimiters}. You must escape both start and stop strings
* {@code \<} and {@code \>}.
*/
public class ParseTreePatternMatcher {
public static class CannotInvokeStartRule extends RuntimeException {
public CannotInvokeStartRule(Throwable e) {
super(e);
}
}
// Fixes https://github.com/antlr/antlr4/issues/413
// "Tree pattern compilation doesn't check for a complete parse"
public static class StartRuleDoesNotConsumeFullPattern extends RuntimeException {
}
/**
* This is the backing field for {@link #getLexer()}.
*/
private final Lexer lexer;
/**
* This is the backing field for {@link #getParser()}.
*/
private final Parser parser;
protected String start = "<";
protected String stop = ">";
protected String escape = "\\"; // e.g., \< and \> must escape BOTH!
/**
* Constructs a {@link ParseTreePatternMatcher} or from a {@link Lexer} and
* {@link Parser} object. The lexer input stream is altered for tokenizing
* the tree patterns. The parser is used as a convenient mechanism to get
* the grammar name, plus token, rule names.
*/
public ParseTreePatternMatcher(Lexer lexer, Parser parser) {
this.lexer = lexer;
this.parser = parser;
}
/**
* Set the delimiters used for marking rule and token tags within concrete
* syntax used by the tree pattern parser.
*
* @param start The start delimiter.
* @param stop The stop delimiter.
* @param escapeLeft The escape sequence to use for escaping a start or stop delimiter.
*
* @exception IllegalArgumentException if {@code start} is {@code null} or empty.
* @exception IllegalArgumentException if {@code stop} is {@code null} or empty.
*/
public void setDelimiters(String start, String stop, String escapeLeft) {
if (start == null || start.isEmpty()) {
throw new IllegalArgumentException("start cannot be null or empty");
}
if (stop == null || stop.isEmpty()) {
throw new IllegalArgumentException("stop cannot be null or empty");
}
this.start = start;
this.stop = stop;
this.escape = escapeLeft;
}
/** Does {@code pattern} matched as rule {@code patternRuleIndex} match {@code tree}? */
public boolean matches(ParseTree tree, String pattern, int patternRuleIndex) {
ParseTreePattern p = compile(pattern, patternRuleIndex);
return matches(tree, p);
}
/** Does {@code pattern} matched as rule patternRuleIndex match tree? Pass in a
* compiled pattern instead of a string representation of a tree pattern.
*/
public boolean matches(ParseTree tree, ParseTreePattern pattern) {
MultiMap labels = new MultiMap();
ParseTree mismatchedNode = matchImpl(tree, pattern.getPatternTree(), labels);
return mismatchedNode == null;
}
/**
* Compare {@code pattern} matched as rule {@code patternRuleIndex} against
* {@code tree} and return a {@link ParseTreeMatch} object that contains the
* matched elements, or the node at which the match failed.
*/
public ParseTreeMatch match(ParseTree tree, String pattern, int patternRuleIndex) {
ParseTreePattern p = compile(pattern, patternRuleIndex);
return match(tree, p);
}
/**
* Compare {@code pattern} matched against {@code tree} and return a
* {@link ParseTreeMatch} object that contains the matched elements, or the
* node at which the match failed. Pass in a compiled pattern instead of a
* string representation of a tree pattern.
*/
@NotNull
public ParseTreeMatch match(@NotNull ParseTree tree, @NotNull ParseTreePattern pattern) {
MultiMap labels = new MultiMap();
ParseTree mismatchedNode = matchImpl(tree, pattern.getPatternTree(), labels);
return new ParseTreeMatch(tree, pattern, labels, mismatchedNode);
}
/**
* For repeated use of a tree pattern, compile it to a
* {@link ParseTreePattern} using this method.
*/
public ParseTreePattern compile(String pattern, int patternRuleIndex) {
List extends Token> tokenList = tokenize(pattern);
ListTokenSource tokenSrc = new ListTokenSource(tokenList);
CommonTokenStream tokens = new CommonTokenStream(tokenSrc);
ParserInterpreter parserInterp = new ParserInterpreter(parser.getGrammarFileName(),
parser.getVocabulary(),
Arrays.asList(parser.getRuleNames()),
parser.getATNWithBypassAlts(),
tokens);
ParseTree tree = null;
try {
parserInterp.setErrorHandler(new BailErrorStrategy());
tree = parserInterp.parse(patternRuleIndex);
// System.out.println("pattern tree = "+tree.toStringTree(parserInterp));
}
catch (ParseCancellationException e) {
throw (RecognitionException)e.getCause();
}
catch (RecognitionException re) {
throw re;
}
catch (Exception e) {
throw new CannotInvokeStartRule(e);
}
// Make sure tree pattern compilation checks for a complete parse
if ( tokens.LA(1)!=Token.EOF ) {
throw new StartRuleDoesNotConsumeFullPattern();
}
return new ParseTreePattern(this, pattern, patternRuleIndex, tree);
}
/**
* Used to convert the tree pattern string into a series of tokens. The
* input stream is reset.
*/
@NotNull
public Lexer getLexer() {
return lexer;
}
/**
* Used to collect to the grammar file name, token names, rule names for
* used to parse the pattern into a parse tree.
*/
@NotNull
public Parser getParser() {
return parser;
}
// ---- SUPPORT CODE ----
/**
* Recursively walk {@code tree} against {@code patternTree}, filling
* {@code match.}{@link ParseTreeMatch#labels labels}.
*
* @return the first node encountered in {@code tree} which does not match
* a corresponding node in {@code patternTree}, or {@code null} if the match
* was successful. The specific node returned depends on the matching
* algorithm used by the implementation, and may be overridden.
*/
@Nullable
protected ParseTree matchImpl(@NotNull ParseTree tree,
@NotNull ParseTree patternTree,
@NotNull MultiMap labels)
{
if (tree == null) {
throw new IllegalArgumentException("tree cannot be null");
}
if (patternTree == null) {
throw new IllegalArgumentException("patternTree cannot be null");
}
// x and , x and y, or x and x; or could be mismatched types
if ( tree instanceof TerminalNode && patternTree instanceof TerminalNode ) {
TerminalNode t1 = (TerminalNode)tree;
TerminalNode t2 = (TerminalNode)patternTree;
ParseTree mismatchedNode = null;
// both are tokens and they have same type
if ( t1.getSymbol().getType() == t2.getSymbol().getType() ) {
if ( t2.getSymbol() instanceof TokenTagToken ) { // x and
TokenTagToken tokenTagToken = (TokenTagToken)t2.getSymbol();
// track label->list-of-nodes for both token name and label (if any)
labels.map(tokenTagToken.getTokenName(), tree);
if ( tokenTagToken.getLabel()!=null ) {
labels.map(tokenTagToken.getLabel(), tree);
}
}
else if ( t1.getText().equals(t2.getText()) ) {
// x and x
}
else {
// x and y
if (mismatchedNode == null) {
mismatchedNode = t1;
}
}
}
else {
if (mismatchedNode == null) {
mismatchedNode = t1;
}
}
return mismatchedNode;
}
if ( tree instanceof ParserRuleContext && patternTree instanceof ParserRuleContext ) {
ParserRuleContext r1 = (ParserRuleContext)tree;
ParserRuleContext r2 = (ParserRuleContext)patternTree;
ParseTree mismatchedNode = null;
// (expr ...) and
RuleTagToken ruleTagToken = getRuleTagToken(r2);
if ( ruleTagToken!=null ) {
ParseTreeMatch m = null;
if ( r1.getRuleContext().getRuleIndex() == r2.getRuleContext().getRuleIndex() ) {
// track label->list-of-nodes for both rule name and label (if any)
labels.map(ruleTagToken.getRuleName(), tree);
if ( ruleTagToken.getLabel()!=null ) {
labels.map(ruleTagToken.getLabel(), tree);
}
}
else {
if (mismatchedNode == null) {
mismatchedNode = r1;
}
}
return mismatchedNode;
}
// (expr ...) and (expr ...)
if ( r1.getChildCount()!=r2.getChildCount() ) {
if (mismatchedNode == null) {
mismatchedNode = r1;
}
return mismatchedNode;
}
int n = r1.getChildCount();
for (int i = 0; i)} subtree? */
protected RuleTagToken getRuleTagToken(ParseTree t) {
if ( t instanceof RuleNode ) {
RuleNode r = (RuleNode)t;
if ( r.getChildCount()==1 && r.getChild(0) instanceof TerminalNode ) {
TerminalNode c = (TerminalNode)r.getChild(0);
if ( c.getSymbol() instanceof RuleTagToken ) {
// System.out.println("rule tag subtree "+t.toStringTree(parser));
return (RuleTagToken)c.getSymbol();
}
}
}
return null;
}
public List extends Token> tokenize(String pattern) {
// split pattern into chunks: sea (raw input) and islands (, )
List chunks = split(pattern);
// create token stream from text and tags
List tokens = new ArrayList();
for (Chunk chunk : chunks) {
if ( chunk instanceof TagChunk ) {
TagChunk tagChunk = (TagChunk)chunk;
// add special rule token or conjure up new token from name
if ( Character.isUpperCase(tagChunk.getTag().charAt(0)) ) {
Integer ttype = parser.getTokenType(tagChunk.getTag());
if ( ttype==Token.INVALID_TYPE ) {
throw new IllegalArgumentException("Unknown token "+tagChunk.getTag()+" in pattern: "+pattern);
}
TokenTagToken t = new TokenTagToken(tagChunk.getTag(), ttype, tagChunk.getLabel());
tokens.add(t);
}
else if ( Character.isLowerCase(tagChunk.getTag().charAt(0)) ) {
int ruleIndex = parser.getRuleIndex(tagChunk.getTag());
if ( ruleIndex==-1 ) {
throw new IllegalArgumentException("Unknown rule "+tagChunk.getTag()+" in pattern: "+pattern);
}
int ruleImaginaryTokenType = parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex];
tokens.add(new RuleTagToken(tagChunk.getTag(), ruleImaginaryTokenType, tagChunk.getLabel()));
}
else {
throw new IllegalArgumentException("invalid tag: "+tagChunk.getTag()+" in pattern: "+pattern);
}
}
else {
TextChunk textChunk = (TextChunk)chunk;
lexer.setInputStream(CharStreams.fromString(textChunk.getText()));
Token t = lexer.nextToken();
while ( t.getType()!=Token.EOF ) {
tokens.add(t);
t = lexer.nextToken();
}
}
}
// System.out.println("tokens="+tokens);
return tokens;
}
/** Split {@code = ;} into 4 chunks for tokenizing by {@link #tokenize}. */
public List split(String pattern) {
int p = 0;
int n = pattern.length();
List chunks = new ArrayList();
StringBuilder buf = new StringBuilder();
// find all start and stop indexes first, then collect
List starts = new ArrayList();
List stops = new ArrayList();
while ( p stops.size() ) {
throw new IllegalArgumentException("unterminated tag in pattern: "+pattern);
}
if ( starts.size() < stops.size() ) {
throw new IllegalArgumentException("missing start tag in pattern: "+pattern);
}
int ntags = starts.size();
for (int i=0; i=stops.get(i) ) {
throw new IllegalArgumentException("tag delimiters out of order in pattern: "+pattern);
}
}
// collect into chunks now
if ( ntags==0 ) {
String text = pattern.substring(0, n);
chunks.add(new TextChunk(text));
}
if ( ntags>0 && starts.get(0)>0 ) { // copy text up to first tag into chunks
String text = pattern.substring(0, starts.get(0));
chunks.add(new TextChunk(text));
}
for (int i=0; i
String tag = pattern.substring(starts.get(i) + start.length(), stops.get(i));
String ruleOrToken = tag;
String label = null;
int colon = tag.indexOf(':');
if ( colon >= 0 ) {
label = tag.substring(0,colon);
ruleOrToken = tag.substring(colon+1, tag.length());
}
chunks.add(new TagChunk(label, ruleOrToken));
if ( i+1 < ntags ) {
// copy from end of to start of next
String text = pattern.substring(stops.get(i) + stop.length(), starts.get(i + 1));
chunks.add(new TextChunk(text));
}
}
if ( ntags>0 ) {
int afterLastTag = stops.get(ntags - 1) + stop.length();
if ( afterLastTag < n ) { // copy text from end of last tag to end
String text = pattern.substring(afterLastTag, n);
chunks.add(new TextChunk(text));
}
}
// strip out the escape sequences from text chunks but not tags
for (int i = 0; i < chunks.size(); i++) {
Chunk c = chunks.get(i);
if ( c instanceof TextChunk ) {
TextChunk tc = (TextChunk)c;
String unescaped = tc.getText().replace(escape, "");
if (unescaped.length() < tc.getText().length()) {
chunks.set(i, new TextChunk(unescaped));
}
}
}
return chunks;
}
}