All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metreeca.xml.actions.XPath Maven / Gradle / Ivy

The newest version!
/*
 * Copyright © 2013-2022 Metreeca srl
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.metreeca.xml.actions;

import com.metreeca.rest.Xtream;

import org.w3c.dom.*;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.*;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.StreamSupport;

import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import javax.xml.xpath.*;

import static java.util.regex.Pattern.CASE_INSENSITIVE;

import static javax.xml.XMLConstants.XMLNS_ATTRIBUTE;

/**
 * XPath-based XML node processing.
 *
 * 

Maps XML nodes to values produced by a function taking as argument a node-targeted XPath processor.

* * @param the type of the value returned by the processing action */ public final class XPath implements Function { /** * The prefix mapped to the default namespace of the target document ({@value}). */ public static final String DefaultPrefix="_"; private static final String HTMLPrefix="html"; private static final String HTMLUri="http://www.w3.org/1999/xhtml"; private static final Pattern EntityPattern=Pattern.compile("&#(?x?)(?\\d+);", CASE_INSENSITIVE); private static final XPathFactory factory=XPathFactory.newInstance(); /** * Decodes XML numeric entities. * * @param text the text to be decoded * * @return a version of {@code text} where XML numeric entities (for instance {@code ’} or {@code “}) * are replaced with the corresponding Unicode characters * * @throws NullPointerException if {@code text} is null */ public static String decode(final CharSequence text) { if ( text == null ) { throw new NullPointerException("null text"); } final StringBuffer buffer=new StringBuffer(text.length()); final java.util.regex.Matcher matcher=EntityPattern.matcher(text); while ( matcher.find() ) { matcher.appendReplacement(buffer, ""); buffer.append(Character.toChars( Integer.parseInt(matcher.group("code"), matcher.group("hex").isEmpty() ? 10 : 16) )); } matcher.appendTail(buffer); return buffer.toString(); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// private final Function query; /** * Creates an XPath-based node processing action. * * @param query a function taking as argument a processor and returning a value * * @throws NullPointerException if {@code query} is null */ public XPath(final Function query) { if ( query == null ) { throw new NullPointerException("null query"); } this.query=query; } @Override public R apply(final Node node) { return query.apply(new Processor(node)); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** * XPath processor. * *

Applies XPath expression to a target XML node.

*/ public static final class Processor { private final Node node; private final URI base; private final javax.xml.xpath.XPath xpath; /** * Creates an XPath processor. * * @param node the target XML node for the processor * * @throws NullPointerException if {@code node} is null */ public Processor(final Node node) { if ( node == null ) { throw new NullPointerException("null node"); } this.xpath=factory.newXPath(); final Map namespaces=new HashMap<>(); final Node root=node instanceof Document ? ((Document)node).getDocumentElement() : node; for (Node scope=root; scope != null; scope=scope.getParentNode()) { final NamedNodeMap attributes=scope.getAttributes(); if ( attributes != null ) { for (int i=0, n=attributes.getLength(); i < n; ++i) { final Node attribute=attributes.item(i); if ( XMLNS_ATTRIBUTE.equals(attribute.getNodeName()) ) { // default namespace namespaces.putIfAbsent(DefaultPrefix, attribute.getNodeValue()); } else if ( XMLNS_ATTRIBUTE.equals(attribute.getPrefix()) ) { // prefixed namespace namespaces.putIfAbsent(attribute.getLocalName(), attribute.getNodeValue()); } } } } final String namespace=root.getNamespaceURI(); namespaces.computeIfAbsent(DefaultPrefix, prefix -> namespace); namespaces.computeIfAbsent(HTMLPrefix, prefix -> HTMLUri.equals(namespace) ? namespace : null); xpath.setNamespaceContext(new NamespaceContext() { @Override public String getNamespaceURI(final String prefix) { return namespaces.get(prefix); } @Override public String getPrefix(final String namespaceURI) { throw new UnsupportedOperationException("prefix lookup"); } @Override public Iterator getPrefixes(final String namespaceURI) { throw new UnsupportedOperationException("prefixes lookup"); } }); this.node=node; this.base=Optional .ofNullable(Optional.of(root) .filter(r -> HTMLUri.equals(r.getNamespaceURI())) .map(r -> { try { return (String)xpath .compile("/html:html/html:head/html:base/@href") .evaluate(r, XPathConstants.STRING); } catch ( final XPathExpressionException e ) { return null; } }) .filter(href -> !href.isEmpty()) .orElse(node.getBaseURI()) ) .map(url -> { try { return new URI(url); } catch ( final URISyntaxException e ) { return null; } }) .orElse(null); } /** * Retrieves the target node. * * @return the target node of this processor. */ public Node node() { return node; } /** * Retrieves the target document. * * @return the target document of this processor. */ public Document document() { return node instanceof Document ? (Document)node : node.getOwnerDocument(); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** * Retrieves a boolean value from the target node. * * @param xpath the XPath expression to be evaluated against the target node * * @return an optional boolean containing the value produced by evaluating {@code xpath} against the target node * * @throws NullPointerException if {@code xpath} is null */ public Optional bool(final String xpath) { if ( xpath == null ) { throw new NullPointerException("null XPath expression"); } return Optional.of((Boolean)evaluate(xpath, XPathConstants.BOOLEAN)); } /** * Retrieves a numeric value from the target node. * * @param xpath the XPath expression to be evaluated against the target node * * @return an optional number containing the value produced by evaluating {@code xpath} against the target node, * if one was available; an empty optional, otherwise * * @throws NullPointerException if {@code xpath} is null */ public Optional number(final String xpath) { if ( xpath == null ) { throw new NullPointerException("null XPath expression"); } return Optional.of((Double)evaluate(xpath, XPathConstants.NUMBER)).filter(x -> !Double.isNaN(x)); } /** * Retrieves a textual value from the target node. * * @param xpath the XPath expression to be evaluated against the target node * * @return an optional non-empty string containing the value produced by evaluating {@code xpath} against the * target node, if one was available and not empty; an empty optional, otherwise * * @throws NullPointerException if {@code xpath} is null */ public Optional string(final String xpath) { if ( xpath == null ) { throw new NullPointerException("null XPath expression"); } return Optional.of((String)evaluate(xpath, XPathConstants.STRING)).filter(s -> !s.isEmpty()); } /** * Retrieves textual values from the target node. * * @param xpath the XPath expression to be evaluated against the target node * * @return a stream of non-empty strings containing the values produced by evaluating {@code xpath} against the * target node * * @throws NullPointerException if {@code xpath} is null */ public Xtream strings(final String xpath) { if ( xpath == null ) { throw new NullPointerException("null XPath expression"); } return nodes(xpath).map(Node::getTextContent).filter(s -> !s.isEmpty()); } /** * Retrieves a URI value from the target node. * * @param xpath the XPath expression to be evaluated against the target node * * @return an optional string URIs containing the values produced by evaluating {@code xpath} against the target * node, if one was available, and resolving it against the {@linkplain Node#getBaseURI() base URI} of the * target node, if available; syntactically malformed URIs are returned verbatim * * @throws NullPointerException if {@code xpath} is null */ public Optional link(final String xpath) { if ( xpath == null ) { throw new NullPointerException("null XPath expression"); } return node(xpath).map(Node::getTextContent).map(s -> { try { return base == null ? s : base.resolve(s).normalize().toString(); } catch ( final IllegalArgumentException e ) { return s; } }); } /** * Retrieves URI values from the target node. * * @param xpath the XPath expression to be evaluated against the target node * * @return a stream of URIs containing the values produced by evaluating {@code xpath} against the target node * and resolving them against the {@linkplain Node#getBaseURI() base URI} of the target node, if available; * syntactically malformed URIs are returned verbatim * * @throws NullPointerException if {@code xpath} is null */ public Xtream links(final String xpath) { if ( xpath == null ) { throw new NullPointerException("null XPath expression"); } return nodes(xpath).map(Node::getTextContent).map(s -> { try { return base == null ? s : base.resolve(s).normalize().toString(); } catch ( final IllegalArgumentException e ) { return s; } }); } /** * Retrieves an element value from the target node. * * @param xpath the XPath expression to be evaluated against the target node * * @return an optional element containing the value produced by evaluating {@code xpath} against the target * node, * if one was available; an empty optional, otherwise * * @throws NullPointerException if {@code xpath} is null */ public Optional element(final String xpath) { if ( xpath == null ) { throw new NullPointerException("null XPath expression"); } return node(xpath) .filter(Element.class::isInstance) .map(Element.class::cast); } /** * Retrieves element values from the target node. * * @param xpath the XPath expression to be evaluated against the target node * * @return a stream of elements containing the values produced by evaluating {@code xpath} against the target * node * * @throws NullPointerException if {@code xpath} is null */ public Xtream elements(final String xpath) { if ( xpath == null ) { throw new NullPointerException("null XPath expression"); } return nodes(xpath) .filter(Element.class::isInstance) .map(Element.class::cast); } /** * Retrieves a node value from the target node. * * @param xpath the XPath expression to be evaluated against the target node * * @return an optional node containing the value produced by evaluating {@code xpath} against the target node, * if one was available; an empty optional, otherwise * * @throws NullPointerException if {@code xpath} is null */ public Optional node(final String xpath) { if ( xpath == null ) { throw new NullPointerException("null XPath expression"); } return Optional.ofNullable((Node)evaluate(xpath, XPathConstants.NODE)); } /** * Retrieves node values from the target node. * * @param xpath the XPath expression to be evaluated against the target node * * @return a stream of nodes containing the values produced by evaluating {@code xpath} against the target node * * @throws NullPointerException if {@code xpath} is null */ public Xtream nodes(final String xpath) { if ( xpath == null ) { throw new NullPointerException("null XPath expression"); } return Xtream.from(StreamSupport.stream(Spliterators.spliteratorUnknownSize(new Iterator() { private final NodeList nodes=(NodeList)evaluate(xpath, XPathConstants.NODESET); private int next; @Override public boolean hasNext() { return next < nodes.getLength(); } @Override public Node next() throws NoSuchElementException { if ( !hasNext() ) { throw new NoSuchElementException("no more iterator elements"); } return nodes.item(next++); } }, Spliterator.ORDERED), false)); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// private Object evaluate(final String query, final QName type) { try { return xpath.compile(query).evaluate(node, type); } catch ( final XPathExpressionException e ) { throw new RuntimeException(String.format("unable to evaluate XPath expression {%s}", query), e); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy