net.sourceforge.pmd.lang.xml.rule.DomXPathRule Maven / Gradle / Ivy
/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.xml.rule;
import java.util.Objects;
import net.sourceforge.pmd.lang.LanguageProcessor;
import net.sourceforge.pmd.lang.ast.Node;
import net.sourceforge.pmd.lang.rule.AbstractRule;
import net.sourceforge.pmd.lang.rule.xpath.XPathRule;
import net.sourceforge.pmd.lang.xml.ast.internal.XmlParserImpl.RootXmlNode;
import net.sourceforge.pmd.properties.PropertyDescriptor;
import net.sourceforge.pmd.properties.PropertyFactory;
import net.sourceforge.pmd.reporting.RuleContext;
/**
* XPath rule that executes an expression on the DOM directly, and not
* on the PMD AST wrapper. The XPath expressions adheres to the XPath
* (2.0) spec, so they can be tested in any existing XPath testing tools
* instead of just the PMD designer (google "xpath test"). Usage of this
* class is strongly recommended over the standard {@link XPathRule}, which
* is mostly useful in other PMD languages.
*
* Differences with {@link XPathRule}
*
* This rule and {@link XPathRule} do not accept exactly the same queries,
* because {@link XPathRule} implements the XPath spec in an ad-hoc way.
* The main differences are:
*
* - {@link XPathRule} uses elements to represent text nodes.
* This is contrary to the XPath spec, in which element and text nodes
* are different kinds of nodes. To replace the query {@code //elt/text[@Text="abc"]},
* use the XPath function {@code text()}, eg {@code //elt[text()="abc"]}.
*
- {@link XPathRule} adds additional attributes to each element
* (eg {@code @BeginLine} and {@code @Text}). These attributes are not
* XML attributes, so they are not accessible using DomXPathRule rule.
* Instead, use the XPath functions {@code pmd:startLine(node)}, {@code pmd:endLine(node)} and related.
* For instance, replace {@code //elt[@EndLine - @BeginLine > 10]} with
* {@code elt[pmd:endLine(.) - pmd:startLine(.) > 10]}.
*
- {@link XPathRule} uses an element called {@code "document"} as the
* root node of every XML AST. This node does not have the correct node kind,
* as it's an element, not a document. To replace {@code /document/RootNode},
* use just {@code /RootNode}.
*
- {@link XPathRule} ignores comments and processing instructions
* (eg FXML's {@code }).
* This rule makes them accessible with the regular XPath syntax.
* The following finds all comments in the file:
*
{@code
* //comment()
* }
* The following finds only top-level comments starting with "prefix":
* {@code
* /comment()[fn:starts-with(fn:string(.), "prefix")]
* }
* Note the use of {@code fn:string}.
*
* As an example of matching processing instructions, the following
* fetches all {@code } processing instructions.
* {@code
* /processing-instruction('import')
* }
* The string value of the instruction can be found with {@code fn:string}.
*
*
*
* Additionally, this rule only supports XPath 2.0, with no option
* for configuration. This will be bumped to XPath 3.1 in PMD 7.
*
*
Namespace-sensitivity
*
* Another important difference is that this rule is namespace-sensitive.
* If the tested XML documents use a schema ({@code xmlns} attribute on the root),
* you should set the property {@code defaultNsUri} on the rule with
* the value of the {@code xmlns} attribute. Otherwise node tests won't
* match unless you use a wildcard URI prefix ({@code *:nodeName}).
*
*
For instance for the document
*
{@code
*
*
* }
* the XPath query {@code //foo} will not match anything, while {@code //*:foo}
* will. If you set the property {@code defaultNsUri} to {@code "http://company.com/aschema"},
* then {@code //foo} will be expanded to {@code //Q{http://company.com/aschema}foo},
* and match the {@code foo} node. The behaviour is equivalent in the following
* document:
* {@code
*
*
* }
*
* However, for the document
*
{@code
*
*
* }
* the XPath queries {@code //foo} and {@code //*:foo} both match, because
* {@code //foo} is expanded to {@code //Q{}foo} (local name foo, empty URI),
* and the document has no default namespace (= the empty default namespace).
*
* Note that explicitly specifying URIs with {@code Q{...}localName}
* as in this documentation is XPath 3.1 syntax and will only be available
* in PMD 7.
*
* @since PMD 6.44.0
* @author Clément Fournier
*/
public class DomXPathRule extends AbstractRule {
SaxonDomXPathQuery query;
private static final PropertyDescriptor XPATH_EXPR
= PropertyFactory.stringProperty("xpath")
.desc("An XPath 2.0 expression that will be evaluated against the root DOM")
.defaultValue("") // no default value
.build();
private static final PropertyDescriptor DEFAULT_NS_URI
= PropertyFactory.stringProperty("defaultNsUri")
.desc("A URI for the default namespace of node tests in the XPath expression."
+ "This is provided to match documents based on their declared schema.")
.defaultValue("")
.build();
public DomXPathRule() {
definePropertyDescriptor(XPATH_EXPR);
definePropertyDescriptor(DEFAULT_NS_URI);
}
public DomXPathRule(String xpath) {
this(xpath, "");
}
public DomXPathRule(String xpath, String defaultNsUri) {
this();
setProperty(XPATH_EXPR, xpath);
setProperty(DEFAULT_NS_URI, defaultNsUri);
}
@Override
public void apply(Node node, RuleContext ctx) {
RootXmlNode root = (RootXmlNode) node;
SaxonDomXPathQuery query = getXPathQuery();
for (Node foundNode : query.evaluate(root, this)) {
ctx.addViolation(foundNode);
}
}
@Override
public void initialize(LanguageProcessor languageProcessor) {
query = new SaxonDomXPathQuery(getProperty(XPATH_EXPR),
getProperty(DEFAULT_NS_URI),
getPropertyDescriptors(),
languageProcessor.services().getXPathHandler());
}
private SaxonDomXPathQuery getXPathQuery() {
return Objects.requireNonNull(query, "rule not initialized");
}
}