org.antlr.v4.runtime.tree.xpath.XPath Maven / Gradle / Ivy
The newest version!
/*
* Copyright (c) 2012 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD-3-Clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.runtime.tree.xpath;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.LexerNoViableAltException;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.tree.ParseTree;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
/**
* Represent a subset of XPath XML path syntax for use in identifying nodes in
* parse trees.
*
*
* Split path into words and separators {@code /} and {@code //} via ANTLR
* itself then walk path elements from left to right. At each separator-word
* pair, find set of nodes. Next stage uses those as work list.
*
*
* The basic interface is
* {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}.
* But that is just shorthand for:
*
*
* {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString);
* return p.{@link #evaluate evaluate}(tree);
*
*
*
* See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this
* allows operators:
*
*
* - /
- root
* - //
- anywhere
* - !
- invert; this must appear directly after root or anywhere
* operator
*
*
*
* and path elements:
*
*
* - ID
- token name
* - 'string'
- any string literal token from the grammar
* - expr
- rule name
* - *
- wildcard matching any node
*
*
*
* Whitespace is not allowed.
*/
public class XPath {
public static final String WILDCARD = "*"; // word not operator/separator
public static final String NOT = "!"; // word for invert operator
protected String path;
protected XPathElement[] elements;
protected Parser parser;
public XPath(Parser parser, String path) {
this.parser = parser;
this.path = path;
elements = split(path);
// System.out.println(Arrays.toString(elements));
}
// TODO: check for invalid token/rule names, bad syntax
public XPathElement[] split(String path) {
XPathLexer lexer = new XPathLexer(CharStreams.fromString(path)) {
@Override
public void recover(LexerNoViableAltException e) { throw e; }
};
lexer.removeErrorListeners();
lexer.addErrorListener(new XPathLexerErrorListener());
CommonTokenStream tokenStream = new CommonTokenStream(lexer);
try {
tokenStream.fill();
}
catch (LexerNoViableAltException e) {
int pos = lexer.getCharPositionInLine();
String msg = "Invalid tokens or characters at index "+pos+" in path '"+path+"'";
throw new IllegalArgumentException(msg, e);
}
List tokens = tokenStream.getTokens();
// System.out.println("path="+path+"=>"+tokens);
List elements = new ArrayList();
int n = tokens.size();
int i=0;
loop:
while ( i findAll(ParseTree tree, String xpath, Parser parser) {
XPath p = new XPath(parser, xpath);
return p.evaluate(tree);
}
/**
* Return a list of all nodes starting at {@code t} as root that satisfy the
* path. The root {@code /} is relative to the node passed to
* {@link #evaluate}.
*/
public Collection evaluate(final ParseTree t) {
ParserRuleContext dummyRoot = new ParserRuleContext();
dummyRoot.children = Collections.singletonList(t); // don't set t's parent.
Collection work = Collections.singleton(dummyRoot);
int i = 0;
while ( i < elements.length ) {
Collection next = new LinkedHashSet();
for (ParseTree node : work) {
if ( node.getChildCount()>0 ) {
// only try to match next element if it has children
// e.g., //func/*/stat might have a token node for which
// we can't go looking for stat nodes.
Collection extends ParseTree> matching = elements[i].evaluate(node);
next.addAll(matching);
}
}
i++;
work = next;
}
return work;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy