org.netarchivesuite.heritrix3wrapper.xmlutils.XmlValidator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of heritrix3-wrapper Show documentation
Show all versions of heritrix3-wrapper Show documentation
Wrapper library for unpacking and communicating with Heritrix 3.
package org.netarchivesuite.heritrix3wrapper.xmlutils;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.EntityResolver;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
/**
* Implements an XML validator for well-formed-ness and against DTD/XSD if they are defined.
*/
public class XmlValidator {
/** Logging mechanism. */
private static Logger logger = LoggerFactory.getLogger(XmlValidator.class.getName());
/** Schema validation enabler name. */
public static final String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
/** Schema validation enabler value. */
public static final String W3C_XML_SCHEMA = "http://www.w3.org/2001/XMLSchema";
/** Cached document builder factory without DTD/Scheme validation. */
private DocumentBuilderFactory factoryParsing;
/** Cached document builder without DTD/Scheme validation. */
private DocumentBuilder builderParsing;
/** Cached document builder factory with DTD/Schema validation. */
private DocumentBuilderFactory factoryValidating;
/** Cached document builder with DTD/Schema validation. */
private DocumentBuilder builderValidating;
/**
* Construct an XmlValidator
instance.
*/
public XmlValidator() {
factoryParsing = DocumentBuilderFactory.newInstance();
factoryParsing.setNamespaceAware(true);
factoryParsing.setValidating(false);
factoryValidating = DocumentBuilderFactory.newInstance();
factoryValidating.setNamespaceAware(true);
factoryValidating.setValidating(true);
factoryValidating.setAttribute(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA);
/*
try {
builderParsing = factoryParsing.newDocumentBuilder();
builderValidating = factoryValidating.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new IllegalStateException("Could not create a new 'DocumentBuilder'!");
}
*/
}
/**
* Validate XML document for well-formed-ness and also against any DTD/XSD found.
* @param xmlFile xml file
* @param entityResolver XML entity resolver or null
* @param errorHandler error handler or null
* @return XML validation result
*/
public XmlValidationResult validate(File xmlFile, EntityResolver entityResolver, XmlErrorHandlerAbstract errorHandler) {
if (xmlFile == null) {
throw new IllegalArgumentException("xmlFile is null");
}
XmlValidationResult result = new XmlValidationResult();
if (errorHandler == null) {
errorHandler = new XmlErrorHandler();
}
errorHandler.reset();
InputStream in = null;
try {
/*
* Test for well-formed-ness.
*/
in = new FileInputStream(xmlFile);
if (builderParsing != null) {
try {
builderParsing.reset();
} catch (UnsupportedOperationException e) {
builderParsing = null;
}
}
if (builderParsing == null) {
try {
builderParsing = factoryParsing.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new IllegalStateException("Could not create a new 'DocumentBuilder'!");
}
}
builderParsing.setErrorHandler(errorHandler);
result.document = builderParsing.parse(in);
in.close();
in = null;
result.bWellformed = !errorHandler.hasErrors();
/*
* Look for references to DTD or XSD.
*/
DocumentType documentType = result.document.getDoctype();
if (documentType != null) {
result.systemId = documentType.getSystemId();
}
if (result.systemId != null) {
result.bDtdUsed = true;
} else {
XPathFactory xpf = XPathFactory.newInstance();
XPath xp = xpf.newXPath();
NodeList nodes;
Node node;
// JDK6 XPath engine supposedly only implements v1.0 of the specs.
nodes = (NodeList)xp.evaluate("//*", result.document.getDocumentElement(), XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++) {
node = nodes.item(i).getAttributes().getNamedItem("xmlns:xsi");
if (node != null) {
result.xsiNamespaces.add(node.getNodeValue());
result.bXsdUsed = true;
}
node = nodes.item(i).getAttributes().getNamedItem("xsi:schemaLocation");
if (node != null) {
// debug
result.schemas.add(node.getNodeValue());
result.bXsdUsed = true;
}
}
}
/*
* Validate against DTD/XSD.
*/
if (result.bDtdUsed || result.bXsdUsed) {
in = new FileInputStream(xmlFile);
if (builderValidating != null) {
try {
builderValidating.reset();
} catch (UnsupportedOperationException e) {
builderValidating = null;
}
}
if (builderValidating == null) {
try {
builderValidating = factoryValidating.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new IllegalStateException("Could not create a new 'DocumentBuilder'!");
}
}
builderValidating.setEntityResolver(entityResolver);
builderValidating.setErrorHandler(errorHandler);
result.document = builderValidating.parse(in);
in.close();
in = null;
result.bValid = !errorHandler.hasErrors();
}
} catch (Throwable t) {
logger.error("Exception validating XML stream!", t.toString(), t);
try {
String publicId = "";
String systemId = "";
int lineNumber = -1;
int columnNumber = -1;
Exception sillyApiException = new Exception("XML validation exception!", t);
errorHandler.error(new SAXParseException(t.toString(), publicId, systemId, lineNumber, columnNumber, sillyApiException));
} catch (SAXException e) {
throw new IllegalStateException("Failed to add error to ErrorHandler!", e);
}
result.bWellformed = false;
result.bValid = false;
} finally {
if (in != null) {
try {
in.close();
} catch (IOException e) {
logger.error("Exception closing stream!", e.toString(), e);
}
in = null;
}
}
return result;
}
/**
* Test XML document for well-formed-ness and look for DTD/XSD references.
* Closes the input stream before returning.
* @param in XML input stream
* @param entityResolver XML entity resolver or null
* @param errorHandler error handler or null
* @param result validation results
* @return result of testing, true if the document is well-formed
*/
public boolean testStructuralValidity(InputStream in, EntityResolver entityResolver, XmlErrorHandlerAbstract errorHandler, XmlValidationResult result) {
if (in == null) {
throw new IllegalArgumentException("in is null");
}
if (result == null) {
throw new IllegalArgumentException("result is null");
}
if (errorHandler == null) {
errorHandler = new XmlErrorHandler();
}
errorHandler.reset();
result.reset();
try {
/*
* Test for well-formed-ness.
*/
if (builderParsing != null) {
try {
builderParsing.reset();
} catch (UnsupportedOperationException e) {
builderParsing = null;
}
}
if (builderParsing == null) {
try {
builderParsing = factoryParsing.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new IllegalStateException("Could not create a new 'DocumentBuilder'!");
}
}
builderParsing.setErrorHandler(errorHandler);
result.document = builderParsing.parse(in);
in.close();
in = null;
/*
* Look for references to DTD or XSD.
*/
DocumentType documentType = result.document.getDoctype();
if (documentType != null) {
result.systemId = documentType.getSystemId();
}
if (result.systemId != null) {
result.bDtdUsed = true;
} else {
XPathFactory xpf = XPathFactory.newInstance();
XPath xp = xpf.newXPath();
NodeList nodes;
Node node;
// JDK6 XPath engine supposedly only implements v1.0 of the specs.
nodes = (NodeList)xp.evaluate("//*", result.document.getDocumentElement(), XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++) {
node = nodes.item(i).getAttributes().getNamedItem("xmlns:xsi");
if (node != null) {
result.xsiNamespaces.add(node.getNodeValue());
result.bXsdUsed = true;
}
node = nodes.item(i).getAttributes().getNamedItem("xsi:schemaLocation");
if (node != null) {
result.schemas.add(node.getNodeValue());
result.bXsdUsed = true;
}
}
}
} catch (Throwable t) {
logger.error("Exception validating XML stream!", t.toString(), t);
try {
String publicId = "";
String systemId = "";
int lineNumber = -1;
int columnNumber = -1;
Exception sillyApiException = new Exception("XML validation exception!", t);
errorHandler.error(new SAXParseException(t.toString(), publicId, systemId, lineNumber, columnNumber, sillyApiException));
} catch (SAXException e) {
throw new IllegalStateException("Failed to add error to ErrorHandler!", e);
}
} finally {
if (in != null) {
try {
in.close();
} catch (IOException e) {
logger.error("Exception closing stream!", e.toString(), e);
}
in = null;
}
}
result.bWellformed = !errorHandler.hasErrors();
return result.bWellformed;
}
/**
* Validate XML document against any DTD/XSD found. Closes the input stream before returning.
* @param in XML input stream
* @param entityResolver XML entity resolver or null
* @param errorHandler error handler or null
* @param result validation results
* @return result of testing, true if the document was validated against DTD/XSD(s).
*/
public boolean testDefinedValidity(InputStream in, EntityResolver entityResolver, XmlErrorHandlerAbstract errorHandler, XmlValidationResult result) {
if (in == null) {
throw new IllegalArgumentException("in is null");
}
if (result == null) {
throw new IllegalArgumentException("result is null");
}
if (errorHandler == null) {
errorHandler = new XmlErrorHandler();
}
try {
/*
* Validate against DTD/XSD.
*/
if (builderValidating != null) {
try {
builderValidating.reset();
} catch (UnsupportedOperationException e) {
builderValidating = null;
}
}
if (builderValidating == null) {
try {
builderValidating = factoryValidating.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new IllegalStateException("Could not create a new 'DocumentBuilder'!");
}
}
builderValidating.setEntityResolver(entityResolver);
builderValidating.setErrorHandler(errorHandler);
result.document = builderValidating.parse(in);
in.close();
in = null;
} catch (Throwable t) {
logger.error("Exception validating XML stream!", t.toString(), t);
try {
String publicId = "";
String systemId = "";
int lineNumber = -1;
int columnNumber = -1;
Exception sillyApiException = new Exception("XML validation exception!", t);
errorHandler.error(new SAXParseException(t.toString(), publicId, systemId, lineNumber, columnNumber, sillyApiException));
} catch (SAXException e) {
throw new IllegalStateException("Failed to add error to ErrorHandler!", e);
}
} finally {
if (in != null) {
try {
in.close();
} catch (IOException e) {
logger.error("Exception closing stream!", e.toString(), e);
}
in = null;
}
}
result.bValid = !errorHandler.hasErrors();
if (result.bValid) {
result.bWellformed = true;
}
return result.bValid;
}
}