org.shredzone.commons.xml.XQuery Maven / Gradle / Ivy
Show all versions of commons-xml Show documentation
/*
* Shredzone Commons
*
* Copyright (C) 2014 Richard "Shred" Körber
* http://commons.shredzone.org
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Library General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this program. If not, see .
*/
package org.shredzone.commons.xml;
import static java.util.stream.Collectors.*;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.Collections;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* Helps to easily read content from XML sources.
*
* A main goal of {@link XQuery} is to keep XML reading as simple as possible. For this
* reason, sophisticated XML features like validation or namespaces are not supported.
*
* Performance was not a goal as well. If you need to parse large documents, you better
* use the old-fashioned Java ways.
*
* @author Richard "Shred" Körber
*/
public class XQuery {
private final Node node;
private final XPathFactory xpf = XPathFactory.newInstance();
private Optional parent = null;
private Map attrMap = null;
/**
* Private constructor for a {@link Node} element.
*/
private XQuery(Node node) {
this.node = node;
}
/**
* Parses an XML source and returns an {@link XQuery} object representing the root of
* the document.
*
* @param in
* {@link InputSource} of the XML document
* @return {@link XQuery} representing the root of the parsed document
* @throws IOException
* if the XML source could not be read or parsed for any reason
*/
public static XQuery parse(InputSource in) throws IOException {
try {
DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
return new XQuery(db.parse(in));
} catch (ParserConfigurationException|SAXException ex) {
throw new IOException("Could not parse XML", ex);
}
}
/**
* Parses an XML source and returns an {@link XQuery} object representing the root of
* the document.
*
* @param in
* {@link InputStream} of the XML document
* @return {@link XQuery} representing the root of the parsed document
* @throws IOException
* if the XML source could not be read or parsed for any reason
*/
public static XQuery parse(InputStream in) throws IOException {
return parse(new InputSource(in));
}
/**
* Parses an XML source and returns an {@link XQuery} object representing the root of
* the document.
*
* @param r
* {@link Reader} providing the XML document
* @return {@link XQuery} representing the root of the parsed document
* @throws IOException
* if the XML source could not be read or parsed for any reason
*/
public static XQuery parse(Reader r) throws IOException {
return parse(new InputSource(r));
}
/**
* Parses an XML source and returns an {@link XQuery} object representing the root of
* the document.
*
* @param xml
* String containing the XML document
* @return {@link XQuery} representing the root of the parsed document
* @throws IOException
* if the XML source could not be read or parsed for any reason
*/
public static XQuery parse(String xml) throws IOException {
return parse(new StringReader(xml));
}
/**
* Streams all children of this element. Children elements are represented by
* {@link XQuery} objects as well.
*
* @return {@link Stream} of children
*/
public Stream stream() {
return new NodeListSpliterator(node.getChildNodes()).stream()
.filter(it -> it instanceof Element)
.map(XQuery::new);
}
/**
* Returns the next sibling of this element.
*
* @return Next sibling element
* @since 1.1
*/
public Optional nextSibling() {
return findElement(Node::getNextSibling);
}
/**
* Returns the previous sibling of this element.
*
* @return Previous sibling element
* @since 1.1
*/
public Optional previousSibling() {
return findElement(Node::getPreviousSibling);
}
/**
* Selects elements based on the XPath expression that is applied to the tree
* represented by this {@link XQuery}.
*
* @param xpath
* XPath expression
* @return Stream of selected nodes as {@link XQuery} object
*/
public Stream select(String xpath) {
return new NodeListSpliterator(evaluate(xpath)).stream().map(XQuery::new);
}
/**
* Gets a single element based on the XPath expression that is applied to the tree
* represented by this {@link XQuery}. Exactly one element is expected to match the
* XPath expression, otherwise an exception is thrown.
*
* @param xpath
* XPath expression
* @return Selected node
* @since 1.1
*/
public XQuery get(String xpath) {
NodeList nl = evaluate(xpath);
if (nl.getLength() == 1) {
return new XQuery(nl.item(0));
} else if (nl.getLength() == 0) {
throw new IllegalArgumentException("XPath '" + xpath
+ "' does not match any elements");
} else {
throw new IllegalArgumentException("XPath '" + xpath + "' matches "
+ nl.getLength() + " elements");
}
}
/**
* Checks if there is at least one element matching the XPath expression.
*
* @param xpath
* XPath expression
* @return {@code true} if there is at least one element, {@code false} if there is
* none.
* @since 1.1
*/
public boolean exists(String xpath) {
return select(xpath).findAny().isPresent();
}
/**
* Selects values based on the XPath expression that is applied to the tree
* represented by this {@link XQuery}.
*
* @param xpath
* XPath expression
* @return Stream of strings containing the node values
*/
public Stream value(String xpath) {
return select(xpath).map(XQuery::text);
}
/**
* Selects values based on the XPath expression that is applied to the tree
* represented by this {@link XQuery}. In contrast to {@link #value(String)}, this
* method reads the element texts recursively, using {@link #allText()}.
*
* @param xpath
* XPath expression
* @return Stream of strings containing the node values
*/
public Stream allValue(String xpath) {
return select(xpath).map(XQuery::allText);
}
/**
* Returns the text selected by the XPath expression.
*
* @param xpath
* XPath expression
* @return Text selected by the expression
*/
public String text(String xpath) {
return value(xpath).collect(joining());
}
/**
* @return this {@link XQuery} node's tag name.
*/
public String name() {
return node.getNodeName();
}
/**
* @return this {@link XQuery} node's text content, non recursively.
*/
public String text() {
return new NodeListSpliterator(node.getChildNodes()).stream()
.filter(it -> it instanceof Text)
.map(it -> ((Text) it).getNodeValue())
.collect(joining());
}
/**
* @return this {@link XQuery} node's text content, recursively.
*/
public String allText() {
return node.getTextContent();
}
/**
* @return a map of this node's attributes.
*/
public Map attr() {
if (attrMap == null) {
NamedNodeMap nnm = node.getAttributes();
if (nnm != null) {
attrMap = Collections.unmodifiableMap(
IntStream.range(0, nnm.getLength())
.mapToObj(nnm::item)
.collect(toMap(Node::getNodeName, Node::getNodeValue)));
} else {
attrMap = Collections.emptyMap();
}
}
return attrMap;
}
/**
* Returns the parent node of this node, as {@link XQuery} object. A root node
* returns an empty optional instead.
*
* @return parent node
*/
public Optional parent() {
if (parent == null) {
Node p = node.getParentNode();
if (p != null) {
parent = Optional.of(new XQuery(p));
} else {
parent = Optional.empty();
}
}
return parent;
}
/**
* Checks if this is a root node.
*
* @return {@code true} if this is a root node, {@code false} if there's a parent.
* @since 1.1
*/
public boolean isRoot() {
return node.getParentNode() == null;
}
/**
* Returns the root node of this node, as {@link XQuery} object. A root node returns
* itself.
*
* @return root node
* @since 1.1
*/
public XQuery root() {
if (isRoot()) {
return this;
} else {
return new XQuery(node.getOwnerDocument());
}
}
/**
* Evaluates the XPath expression and returns a list of nodes.
*
* @param xpath
* XPath expression
* @return {@link NodeList} matching the expression
* @throws IllegalArgumentException
* if the XPath expression was invalid
*/
private NodeList evaluate(String xpath) {
try {
XPathExpression expr = xpf.newXPath().compile(xpath);
return (NodeList) expr.evaluate(node, XPathConstants.NODESET);
} catch (XPathExpressionException ex) {
throw new IllegalArgumentException("Invalid XPath '" + xpath + "'", ex);
}
}
/**
* Finds an Element node by applying the iterator function until another Element was
* found.
*
* @param iterator
* Iterator to apply
* @return node that was found
*/
private Optional findElement(Function iterator) {
Node it = node;
do {
it = iterator.apply(it);
} while (it != null && !(it instanceof Element));
return Optional.ofNullable(it).map(XQuery::new);
}
}