All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.antlr.v4.runtime.tree.pattern.ParseTreePatternMatcher Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2012 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD-3-Clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

package org.antlr.v4.runtime.tree.pattern;

import org.antlr.v4.runtime.BailErrorStrategy;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.ListTokenSource;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserInterpreter;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.misc.MultiMap;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import org.antlr.v4.runtime.misc.ParseCancellationException;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.RuleNode;
import org.antlr.v4.runtime.tree.TerminalNode;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
 * A tree pattern matching mechanism for ANTLR {@link ParseTree}s.
 *
 * 

Patterns are strings of source input text with special tags representing * token or rule references such as:

* *

{@code = ;}

* *

Given a pattern start rule such as {@code statement}, this object constructs * a {@link ParseTree} with placeholders for the {@code ID} and {@code expr} * subtree. Then the {@link #match} routines can compare an actual * {@link ParseTree} from a parse with this pattern. Tag {@code } matches * any {@code ID} token and tag {@code } references the result of the * {@code expr} rule (generally an instance of {@code ExprContext}.

* *

Pattern {@code x = 0;} is a similar pattern that matches the same pattern * except that it requires the identifier to be {@code x} and the expression to * be {@code 0}.

* *

The {@link #matches} routines return {@code true} or {@code false} based * upon a match for the tree rooted at the parameter sent in. The * {@link #match} routines return a {@link ParseTreeMatch} object that * contains the parse tree, the parse tree pattern, and a map from tag name to * matched nodes (more below). A subtree that fails to match, returns with * {@link ParseTreeMatch#mismatchedNode} set to the first tree node that did not * match.

* *

For efficiency, you can compile a tree pattern in string form to a * {@link ParseTreePattern} object.

* *

See {@code TestParseTreeMatcher} for lots of examples. * {@link ParseTreePattern} has two static helper methods: * {@link ParseTreePattern#findAll} and {@link ParseTreePattern#match} that * are easy to use but not super efficient because they create new * {@link ParseTreePatternMatcher} objects each time and have to compile the * pattern in string form before using it.

* *

The lexer and parser that you pass into the {@link ParseTreePatternMatcher} * constructor are used to parse the pattern in string form. The lexer converts * the {@code = ;} into a sequence of four tokens (assuming lexer * throws out whitespace or puts it on a hidden channel). Be aware that the * input stream is reset for the lexer (but not the parser; a * {@link ParserInterpreter} is created to parse the input.). Any user-defined * fields you have put into the lexer might get changed when this mechanism asks * it to scan the pattern string.

* *

Normally a parser does not accept token {@code } as a valid * {@code expr} but, from the parser passed in, we create a special version of * the underlying grammar representation (an {@link ATN}) that allows imaginary * tokens representing rules ({@code }) to match entire rules. We call * these bypass alternatives.

* *

Delimiters are {@code <} and {@code >}, with {@code \} as the escape string * by default, but you can set them to whatever you want using * {@link #setDelimiters}. You must escape both start and stop strings * {@code \<} and {@code \>}.

*/ public class ParseTreePatternMatcher { public static class CannotInvokeStartRule extends RuntimeException { public CannotInvokeStartRule(Throwable e) { super(e); } } // Fixes https://github.com/antlr/antlr4/issues/413 // "Tree pattern compilation doesn't check for a complete parse" public static class StartRuleDoesNotConsumeFullPattern extends RuntimeException { } /** * This is the backing field for {@link #getLexer()}. */ private final Lexer lexer; /** * This is the backing field for {@link #getParser()}. */ private final Parser parser; protected String start = "<"; protected String stop = ">"; protected String escape = "\\"; // e.g., \< and \> must escape BOTH! /** * Constructs a {@link ParseTreePatternMatcher} or from a {@link Lexer} and * {@link Parser} object. The lexer input stream is altered for tokenizing * the tree patterns. The parser is used as a convenient mechanism to get * the grammar name, plus token, rule names. */ public ParseTreePatternMatcher(Lexer lexer, Parser parser) { this.lexer = lexer; this.parser = parser; } /** * Set the delimiters used for marking rule and token tags within concrete * syntax used by the tree pattern parser. * * @param start The start delimiter. * @param stop The stop delimiter. * @param escapeLeft The escape sequence to use for escaping a start or stop delimiter. * * @exception IllegalArgumentException if {@code start} is {@code null} or empty. * @exception IllegalArgumentException if {@code stop} is {@code null} or empty. */ public void setDelimiters(String start, String stop, String escapeLeft) { if (start == null || start.isEmpty()) { throw new IllegalArgumentException("start cannot be null or empty"); } if (stop == null || stop.isEmpty()) { throw new IllegalArgumentException("stop cannot be null or empty"); } this.start = start; this.stop = stop; this.escape = escapeLeft; } /** Does {@code pattern} matched as rule {@code patternRuleIndex} match {@code tree}? */ public boolean matches(ParseTree tree, String pattern, int patternRuleIndex) { ParseTreePattern p = compile(pattern, patternRuleIndex); return matches(tree, p); } /** Does {@code pattern} matched as rule patternRuleIndex match tree? Pass in a * compiled pattern instead of a string representation of a tree pattern. */ public boolean matches(ParseTree tree, ParseTreePattern pattern) { MultiMap labels = new MultiMap(); ParseTree mismatchedNode = matchImpl(tree, pattern.getPatternTree(), labels); return mismatchedNode == null; } /** * Compare {@code pattern} matched as rule {@code patternRuleIndex} against * {@code tree} and return a {@link ParseTreeMatch} object that contains the * matched elements, or the node at which the match failed. */ public ParseTreeMatch match(ParseTree tree, String pattern, int patternRuleIndex) { ParseTreePattern p = compile(pattern, patternRuleIndex); return match(tree, p); } /** * Compare {@code pattern} matched against {@code tree} and return a * {@link ParseTreeMatch} object that contains the matched elements, or the * node at which the match failed. Pass in a compiled pattern instead of a * string representation of a tree pattern. */ @NotNull public ParseTreeMatch match(@NotNull ParseTree tree, @NotNull ParseTreePattern pattern) { MultiMap labels = new MultiMap(); ParseTree mismatchedNode = matchImpl(tree, pattern.getPatternTree(), labels); return new ParseTreeMatch(tree, pattern, labels, mismatchedNode); } /** * For repeated use of a tree pattern, compile it to a * {@link ParseTreePattern} using this method. */ public ParseTreePattern compile(String pattern, int patternRuleIndex) { List tokenList = tokenize(pattern); ListTokenSource tokenSrc = new ListTokenSource(tokenList); CommonTokenStream tokens = new CommonTokenStream(tokenSrc); ParserInterpreter parserInterp = new ParserInterpreter(parser.getGrammarFileName(), parser.getVocabulary(), Arrays.asList(parser.getRuleNames()), parser.getATNWithBypassAlts(), tokens); ParseTree tree = null; try { parserInterp.setErrorHandler(new BailErrorStrategy()); tree = parserInterp.parse(patternRuleIndex); // System.out.println("pattern tree = "+tree.toStringTree(parserInterp)); } catch (ParseCancellationException e) { throw (RecognitionException)e.getCause(); } catch (RecognitionException re) { throw re; } catch (Exception e) { throw new CannotInvokeStartRule(e); } // Make sure tree pattern compilation checks for a complete parse if ( tokens.LA(1)!=Token.EOF ) { throw new StartRuleDoesNotConsumeFullPattern(); } return new ParseTreePattern(this, pattern, patternRuleIndex, tree); } /** * Used to convert the tree pattern string into a series of tokens. The * input stream is reset. */ @NotNull public Lexer getLexer() { return lexer; } /** * Used to collect to the grammar file name, token names, rule names for * used to parse the pattern into a parse tree. */ @NotNull public Parser getParser() { return parser; } // ---- SUPPORT CODE ---- /** * Recursively walk {@code tree} against {@code patternTree}, filling * {@code match.}{@link ParseTreeMatch#labels labels}. * * @return the first node encountered in {@code tree} which does not match * a corresponding node in {@code patternTree}, or {@code null} if the match * was successful. The specific node returned depends on the matching * algorithm used by the implementation, and may be overridden. */ @Nullable protected ParseTree matchImpl(@NotNull ParseTree tree, @NotNull ParseTree patternTree, @NotNull MultiMap labels) { if (tree == null) { throw new IllegalArgumentException("tree cannot be null"); } if (patternTree == null) { throw new IllegalArgumentException("patternTree cannot be null"); } // x and , x and y, or x and x; or could be mismatched types if ( tree instanceof TerminalNode && patternTree instanceof TerminalNode ) { TerminalNode t1 = (TerminalNode)tree; TerminalNode t2 = (TerminalNode)patternTree; ParseTree mismatchedNode = null; // both are tokens and they have same type if ( t1.getSymbol().getType() == t2.getSymbol().getType() ) { if ( t2.getSymbol() instanceof TokenTagToken ) { // x and TokenTagToken tokenTagToken = (TokenTagToken)t2.getSymbol(); // track label->list-of-nodes for both token name and label (if any) labels.map(tokenTagToken.getTokenName(), tree); if ( tokenTagToken.getLabel()!=null ) { labels.map(tokenTagToken.getLabel(), tree); } } else if ( t1.getText().equals(t2.getText()) ) { // x and x } else { // x and y if (mismatchedNode == null) { mismatchedNode = t1; } } } else { if (mismatchedNode == null) { mismatchedNode = t1; } } return mismatchedNode; } if ( tree instanceof ParserRuleContext && patternTree instanceof ParserRuleContext ) { ParserRuleContext r1 = (ParserRuleContext)tree; ParserRuleContext r2 = (ParserRuleContext)patternTree; ParseTree mismatchedNode = null; // (expr ...) and RuleTagToken ruleTagToken = getRuleTagToken(r2); if ( ruleTagToken!=null ) { ParseTreeMatch m = null; if ( r1.getRuleContext().getRuleIndex() == r2.getRuleContext().getRuleIndex() ) { // track label->list-of-nodes for both rule name and label (if any) labels.map(ruleTagToken.getRuleName(), tree); if ( ruleTagToken.getLabel()!=null ) { labels.map(ruleTagToken.getLabel(), tree); } } else { if (mismatchedNode == null) { mismatchedNode = r1; } } return mismatchedNode; } // (expr ...) and (expr ...) if ( r1.getChildCount()!=r2.getChildCount() ) { if (mismatchedNode == null) { mismatchedNode = r1; } return mismatchedNode; } int n = r1.getChildCount(); for (int i = 0; i)} subtree? */ protected RuleTagToken getRuleTagToken(ParseTree t) { if ( t instanceof RuleNode ) { RuleNode r = (RuleNode)t; if ( r.getChildCount()==1 && r.getChild(0) instanceof TerminalNode ) { TerminalNode c = (TerminalNode)r.getChild(0); if ( c.getSymbol() instanceof RuleTagToken ) { // System.out.println("rule tag subtree "+t.toStringTree(parser)); return (RuleTagToken)c.getSymbol(); } } } return null; } public List tokenize(String pattern) { // split pattern into chunks: sea (raw input) and islands (, ) List chunks = split(pattern); // create token stream from text and tags List tokens = new ArrayList(); for (Chunk chunk : chunks) { if ( chunk instanceof TagChunk ) { TagChunk tagChunk = (TagChunk)chunk; // add special rule token or conjure up new token from name if ( Character.isUpperCase(tagChunk.getTag().charAt(0)) ) { Integer ttype = parser.getTokenType(tagChunk.getTag()); if ( ttype==Token.INVALID_TYPE ) { throw new IllegalArgumentException("Unknown token "+tagChunk.getTag()+" in pattern: "+pattern); } TokenTagToken t = new TokenTagToken(tagChunk.getTag(), ttype, tagChunk.getLabel()); tokens.add(t); } else if ( Character.isLowerCase(tagChunk.getTag().charAt(0)) ) { int ruleIndex = parser.getRuleIndex(tagChunk.getTag()); if ( ruleIndex==-1 ) { throw new IllegalArgumentException("Unknown rule "+tagChunk.getTag()+" in pattern: "+pattern); } int ruleImaginaryTokenType = parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex]; tokens.add(new RuleTagToken(tagChunk.getTag(), ruleImaginaryTokenType, tagChunk.getLabel())); } else { throw new IllegalArgumentException("invalid tag: "+tagChunk.getTag()+" in pattern: "+pattern); } } else { TextChunk textChunk = (TextChunk)chunk; lexer.setInputStream(CharStreams.fromString(textChunk.getText())); Token t = lexer.nextToken(); while ( t.getType()!=Token.EOF ) { tokens.add(t); t = lexer.nextToken(); } } } // System.out.println("tokens="+tokens); return tokens; } /** Split {@code = ;} into 4 chunks for tokenizing by {@link #tokenize}. */ public List split(String pattern) { int p = 0; int n = pattern.length(); List chunks = new ArrayList(); StringBuilder buf = new StringBuilder(); // find all start and stop indexes first, then collect List starts = new ArrayList(); List stops = new ArrayList(); while ( p stops.size() ) { throw new IllegalArgumentException("unterminated tag in pattern: "+pattern); } if ( starts.size() < stops.size() ) { throw new IllegalArgumentException("missing start tag in pattern: "+pattern); } int ntags = starts.size(); for (int i=0; i=stops.get(i) ) { throw new IllegalArgumentException("tag delimiters out of order in pattern: "+pattern); } } // collect into chunks now if ( ntags==0 ) { String text = pattern.substring(0, n); chunks.add(new TextChunk(text)); } if ( ntags>0 && starts.get(0)>0 ) { // copy text up to first tag into chunks String text = pattern.substring(0, starts.get(0)); chunks.add(new TextChunk(text)); } for (int i=0; i String tag = pattern.substring(starts.get(i) + start.length(), stops.get(i)); String ruleOrToken = tag; String label = null; int colon = tag.indexOf(':'); if ( colon >= 0 ) { label = tag.substring(0,colon); ruleOrToken = tag.substring(colon+1, tag.length()); } chunks.add(new TagChunk(label, ruleOrToken)); if ( i+1 < ntags ) { // copy from end of to start of next String text = pattern.substring(stops.get(i) + stop.length(), starts.get(i + 1)); chunks.add(new TextChunk(text)); } } if ( ntags>0 ) { int afterLastTag = stops.get(ntags - 1) + stop.length(); if ( afterLastTag < n ) { // copy text from end of last tag to end String text = pattern.substring(afterLastTag, n); chunks.add(new TextChunk(text)); } } // strip out the escape sequences from text chunks but not tags for (int i = 0; i < chunks.size(); i++) { Chunk c = chunks.get(i); if ( c instanceof TextChunk ) { TextChunk tc = (TextChunk)c; String unescaped = tc.getText().replace(escape, ""); if (unescaped.length() < tc.getText().length()) { chunks.set(i, new TextChunk(unescaped)); } } } return chunks; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy