
com.github.loyada.jdollarx.PathParsers Maven / Gradle / Ivy
package com.github.loyada.jdollarx;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
/**
* functions to find DOM elements in a W3C document. These functions are also useful to experiment and test with how Paths
* can be used to extract elements (they are used in many of the unit tests of DollarX).
* Example use:
* {@code
* Path el = div.before(span);
* String xpath = el.getXPath().get();
* NodeList nodes = findAllByXpath("fooboo", el);
* assertThat(nodes.getLength(), is(2));
* assertThat(nodes.item(0).getTextContent(), equalTo("foo"));
* }
*
*/
public final class PathParsers {
private PathParsers(){}
/**
* Convert a string to a {@link Document}, Assuming utf-8 encoding.
* @param document the document as a string
* @return the document as a @link Document}
*/
public static Document getDocumentFromString(final String document) throws ParserConfigurationException, IOException, SAXException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
InputStream input = new ByteArrayInputStream(document.getBytes(StandardCharsets.UTF_8));
return builder.parse(input);
}
/**
* find all the nodes that match a path in a W3C document
* @param docString a W3C document
* @param path the path to find.
* @return a node list with the details of all the elements that match the given path
*/
public static NodeList findAllByPath(final String docString, final Path path) throws XPathExpressionException, IOException, SAXException, ParserConfigurationException {
return findAllByPath(getDocumentFromString(docString), path );
}
/**
* find all the nodes that match a path in a W3C document
* @param doc a W3C document
* @param path the path to find
* @return a node list with the details of all the elements that match the given path
*/
public static NodeList findAllByPath(final Document doc, final Path path) throws XPathExpressionException {
return findAllByXPath(doc, path.getXPath().get());
}
/**
* internal implementation
* @param doc a W3C document
* @param extractedXpath an xpath
* @return a node list with the details of all the elements that match the given xpath
*/
public static NodeList findAllByXPath(final Document doc, final String extractedXpath) throws XPathExpressionException {
XPathFactory xPathfactory = XPathFactory.newInstance();
XPath xpath = xPathfactory.newXPath();
XPathExpression expr = xpath.compile(XpathUtils.insideTopLevel(extractedXpath));
return (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy