org.antlr.v4.runtime.tree.xpath.XPath Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of antlr4-runtime Show documentation
The ANTLR 4 Runtime
There is a newer version: 4.13.2
/*
 * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

package org.antlr.v4.runtime.tree.xpath;

import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.LexerNoViableAltException;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.tree.ParseTree;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;

/**
 * Represent a subset of XPath XML path syntax for use in identifying nodes in
 * parse trees.
 *
 * 
 * Split path into words and separators {@code /} and {@code //} via ANTLR
 * itself then walk path elements from left to right. At each separator-word
 * pair, find set of nodes. Next stage uses those as work list.
 *
 * 
 * The basic interface is
 * {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}.
 * But that is just shorthand for:
 *
 *  * {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString);
 * return p.{@link #evaluate evaluate}(tree);
 * 
 *
 * 
 * See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this
 * allows operators:
 *
 * 
 * /
 root
 * //
 anywhere
 * !
 invert; this must appear directly after root or anywhere
 * operator
 * 
 *
 * 
 * and path elements:
 *
 * 
 * ID
 token name
 * 'string'
 any string literal token from the grammar
 * expr
 rule name
 * *
 wildcard matching any node
 * 
 *
 * 
 * Whitespace is not allowed.
 */
public class XPath {
	public static final String WILDCARD = "*"; // word not operator/separator
	public static final String NOT = "!"; 	   // word for invert operator

	protected String path;
	protected XPathElement[] elements;
	protected Parser parser;

	public XPath(Parser parser, String path) {
		this.parser = parser;
		this.path = path;
		elements = split(path);
//		System.out.println(Arrays.toString(elements));
	}

	// TODO: check for invalid token/rule names, bad syntax

	public XPathElement[] split(String path) {
		ANTLRInputStream in;
		try {
			in = new ANTLRInputStream(new StringReader(path));
		}
		catch (IOException ioe) {
			throw new IllegalArgumentException("Could not read path: "+path, ioe);
		}
		XPathLexer lexer = new XPathLexer(in) {
			@Override
			public void recover(LexerNoViableAltException e) { throw e;	}
		};
		lexer.removeErrorListeners();
		lexer.addErrorListener(new XPathLexerErrorListener());
		CommonTokenStream tokenStream = new CommonTokenStream(lexer);
		try {
			tokenStream.fill();
		}
		catch (LexerNoViableAltException e) {
			int pos = lexer.getCharPositionInLine();
			String msg = "Invalid tokens or characters at index "+pos+" in path '"+path+"'";
			throw new IllegalArgumentException(msg, e);
		}

		List tokens = tokenStream.getTokens();
//		System.out.println("path="+path+"=>"+tokens);
		List elements = new ArrayList();
		int n = tokens.size();
		int i=0;
loop:
		while ( i findAll(ParseTree tree, String xpath, Parser parser) {
		XPath p = new XPath(parser, xpath);
		return p.evaluate(tree);
	}

	/**
	 * Return a list of all nodes starting at {@code t} as root that satisfy the
	 * path. The root {@code /} is relative to the node passed to
	 * {@link #evaluate}.
	 */
	public Collection evaluate(final ParseTree t) {
		ParserRuleContext dummyRoot = new ParserRuleContext();
		dummyRoot.children = Collections.singletonList(t); // don't set t's parent.

		Collection work = Collections.singleton(dummyRoot);

		int i = 0;
		while ( i < elements.length ) {
			Collection next = new LinkedHashSet();
			for (ParseTree node : work) {
				if ( node.getChildCount()>0 ) {
					// only try to match next element if it has children
					// e.g., //func/*/stat might have a token node for which
					// we can't go looking for stat nodes.
					Collection matching = elements[i].evaluate(node);
					next.addAll(matching);
				}
			}
			i++;
			work = next;
		}

		return work;
	}
}