net.sf.saxon.lib.StandardCollectionURIResolver Maven / Gradle / Ivy
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2014 Saxonica Limited.
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.lib;
import net.sf.saxon.Controller;
import net.sf.saxon.event.Builder;
import net.sf.saxon.event.PipelineConfiguration;
import net.sf.saxon.event.Receiver;
import net.sf.saxon.event.Sender;
import net.sf.saxon.expr.*;
import net.sf.saxon.functions.DocumentFn;
import net.sf.saxon.functions.ResolveURI;
import net.sf.saxon.functions.URIQueryParameters;
import net.sf.saxon.functions.UnparsedTextFunction;
import net.sf.saxon.om.*;
import net.sf.saxon.pattern.NodeKindTest;
import net.sf.saxon.trans.Err;
import net.sf.saxon.trans.Maker;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.tree.iter.ArrayIterator;
import net.sf.saxon.tree.iter.AxisIterator;
import net.sf.saxon.tree.iter.SingletonIterator;
import net.sf.saxon.tree.tiny.Statistics;
import net.sf.saxon.tree.tiny.TinyBuilder;
import net.sf.saxon.tree.util.Navigator;
import net.sf.saxon.value.AnyURIValue;
import net.sf.saxon.value.ObjectValue;
import net.sf.saxon.value.TextFragmentValue;
import net.sf.saxon.value.Whitespace;
import net.sf.saxon.z.IntPredicate;
import org.xml.sax.XMLReader;
import javax.xml.transform.Source;
import javax.xml.transform.TransformerException;
import javax.xml.transform.stream.StreamSource;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
/**
* This class implements the default collection URI Resolver.
*
* This supports two implementations of collections. If the URI supplied uses the "file:/" scheme, and the
* file that is referenced is a directory, then the collection is the set of files in that directory. Query parameters
* may be included in the URI:
*
* recurse=yes|no controls whether the directory is scanned recursively;
* strip-space=yes|no determines whether whitespace text nodes are stripped from the selected documents;
* validation=strict|lax|preserve|strip determines whether schema validation is applied;
* select=pattern determines which files in the directory are selected.
* on-error=fail|warn|ignore determines the action taken if processing of a file fails
* parser=qualified.class.name selects the parser (XMLReader) to be used to read the files
*
*
* Otherwise, the resolver attempts to dereference the URI to obtain a catalog file. This is an XML file
* containing a list of documents, in the format:
*
* <collection>
* <doc href="doc1.xml"/>
* <doc href="doc2.xml"/>
* </collection>
*
*/
public class StandardCollectionURIResolver implements CollectionURIResolver {
/**
* Resolve a URI.
*
* @param href The relative URI of the collection. This corresponds to the
* argument supplied to the collection() function. If the collection() function
* was called with no arguments (to get the "default collection") this argument
* will be null.
* @param base The base URI that should be used. This is the base URI of the
* static context in which the call to collection() was made, typically the URI
* of the stylesheet or query module
* @return an Iterator over the documents in the collection. The items returned
* by this iterator must be instances either of xs:anyURI, or of node() (specifically,
* {@link net.sf.saxon.om.NodeInfo}.). If xs:anyURI values are returned, the corresponding
* document will be retrieved as if by a call to the doc() function: this means that
* the system first checks to see if the document is already loaded, and if not, calls
* the registered URIResolver to dereference the URI. This is the recommended approach
* to ensure that the resulting collection is stable: however, it has the consequence
* that the documents will by default remain in memory for the duration of the query
* or transformation.
*/
public SequenceIterator resolve(/*@Nullable*/ String href, String base, XPathContext context) throws XPathException {
if (href == null) {
XPathException err = new XPathException("No default collection has been defined");
err.setErrorCode("FODC0002");
err.setXPathContext(context);
throw err;
}
URIQueryParameters params = null;
URI relativeURI;
try {
relativeURI = new URI(ResolveURI.escapeSpaces(href));
String query = relativeURI.getQuery();
if (query != null) {
params = new URIQueryParameters(query, context.getConfiguration());
int q = href.indexOf('?');
href = ResolveURI.escapeSpaces(href.substring(0, q));
relativeURI = new URI(href);
}
} catch (URISyntaxException e) {
XPathException err = new XPathException("Invalid relative URI " + Err.wrap(href, Err.VALUE) + " passed to collection() function");
err.setErrorCode("FODC0004");
err.setXPathContext(context);
throw err;
}
URI resolvedURI = makeAbsoluteURI(href, base, context, relativeURI);
if ("file".equals(resolvedURI.getScheme())) {
File file = new File(resolvedURI);
if (!file.exists()) {
XPathException err = new XPathException("The file or directory " + resolvedURI + " does not exist");
err.setErrorCode("FODC0002");
err.setXPathContext(context);
throw err;
}
if (file.isDirectory()) {
return directoryContents(file, params, context);
}
}
return catalogContents(href, base, resolvedURI.toString(), context);
}
protected URI makeAbsoluteURI(String href, String base, XPathContext context, URI relativeURI) throws XPathException {
URI resolvedURI;
if (!relativeURI.isAbsolute()) {
if (base == null) {
base = ResolveURI.tryToExpand(base);
}
try {
resolvedURI = ResolveURI.makeAbsolute(href, base);
} catch (URISyntaxException e) {
XPathException err = new XPathException("Cannot resolve relative URI: " + e.getMessage());
err.setErrorCode("FODC0004");
err.setXPathContext(context);
throw err;
}
} else {
resolvedURI = relativeURI;
}
return resolvedURI;
}
/**
* Return the contents of a collection that maps to a directory in filestore
*
* @param directory the directory to be processed
* @param params parameters indicating whether to process recursively, what to do on
* errors, and which files to select
* @param context the dynamic XPath evaluation context
* @return an iterator over the documents in the collection
*/
protected SequenceIterator directoryContents(File directory, URIQueryParameters params, XPathContext context) {
FilenameFilter filter = null;
if (params != null) {
FilenameFilter f = params.getFilenameFilter();
if (f != null) {
filter = f;
}
}
File[] files;
if (filter == null) {
files = directory.listFiles();
} else {
files = directory.listFiles(filter);
}
ObjectValue[] fileValues = new ObjectValue[files.length];
for (int f = 0; f < files.length; f++) {
fileValues[f] = new ObjectValue(files[f]);
}
// If the URI requested suppression of errors, or that errors should be treated
// as warnings, we set up a special ErrorListener to achieve this
int onError = URIQueryParameters.ON_ERROR_FAIL;
if (params != null && params.getOnError() != null) {
onError = params.getOnError();
}
final Controller controller = context.getController();
final PipelineConfiguration oldPipe = context.getConfiguration().makePipelineConfiguration();
oldPipe.setController(context.getController());
final PipelineConfiguration newPipe = new PipelineConfiguration(oldPipe);
final UnfailingErrorListener oldErrorListener =
controller == null ? new StandardErrorListener() : controller.getErrorListener();
if (onError == URIQueryParameters.ON_ERROR_IGNORE) {
newPipe.setErrorListener(new UnfailingErrorListener() {
public void warning(TransformerException exception) {
}
public void error(TransformerException exception) {
}
public void fatalError(TransformerException exception) {
}
});
} else if (onError == URIQueryParameters.ON_ERROR_WARNING) {
newPipe.setErrorListener(new UnfailingErrorListener() {
public void warning(TransformerException exception) {
oldErrorListener.warning(exception);
}
public void error(TransformerException exception) {
oldErrorListener.warning(exception);
XPathException supp = new XPathException("The document will be excluded from the collection");
supp.setLocator(exception.getLocator());
oldErrorListener.warning(supp);
}
public void fatalError(TransformerException exception) {
error(exception);
}
});
}
FileExpander expander = new FileExpander(params, newPipe);
SequenceIterator base = new ArrayIterator(fileValues);
return new MappingIterator, DocumentInfo>(base, expander);
}
/**
* Return a collection defined as a list of URIs in a catalog file
*
* @param href the relative URI as supplied
* @param baseURI the base URI
* @param absURI the absolute URI of the catalog file
* @param context the dynamic evaluation context
* @return an iterator over the documents in the collection
* @throws XPathException if any failures occur
*/
protected SequenceIterator catalogContents(String href, String baseURI, String absURI, final XPathContext context)
throws XPathException {
boolean stable = true;
Source source = DocumentFn.resolveURI(href, baseURI, null, context);
ParseOptions options = new ParseOptions();
options.setSchemaValidationMode(Validation.SKIP);
DocumentInfo catalog = context.getConfiguration().buildDocument(source, options);
if (catalog == null) {
// we failed to read the catalogue
XPathException err = new XPathException("Failed to load collection catalog " + absURI);
err.setErrorCode("FODC0004");
err.setXPathContext(context);
throw err;
}
// Now return an iterator over the documents that it refers to
AxisIterator iter =
catalog.iterateAxis(AxisInfo.CHILD, NodeKindTest.ELEMENT);
NodeInfo top = iter.next();
if (top == null || !("collection".equals(top.getLocalPart()) && top.getURI().length() == 0)) {
String message;
if (top == null) {
message = "No outermost element found in collection catalog";
} else if (top.getURI().length() != 0) {
message = "Collection catalog should not use a namespace";
} else {
message = "Collection catalog outermost element should be (found " + top.getLocalPart() + ">)";
}
XPathException err = new XPathException(message);
err.setErrorCode("FODC0004");
err.setXPathContext(context);
throw err;
}
iter.close();
String stableAtt = top.getAttributeValue("", "stable");
if (stableAtt != null) {
if ("true".equals(stableAtt)) {
stable = true;
} else if ("false".equals(stableAtt)) {
stable = false;
} else {
XPathException err = new XPathException(
"The 'stable' attribute of element must be true or false");
err.setErrorCode("FODC0004");
err.setXPathContext(context);
throw err;
}
}
final boolean finalStable = stable;
AxisIterator documents =
top.iterateAxis(AxisInfo.CHILD, NodeKindTest.ELEMENT);
ItemMappingFunction catalogueMapper = new ItemMappingFunction() {
public Item mapItem(NodeInfo item) throws XPathException {
if (!("doc".equals(item.getLocalPart()) &&
item.getURI().length() == 0)) {
XPathException err = new XPathException("children of element must be elements");
err.setErrorCode("FODC0004");
err.setXPathContext(context);
throw err;
}
String href = Navigator.getAttributeValue(item, "", "href");
if (href == null) {
XPathException err = new XPathException("\" element in catalog has no @href attribute\"");
err.setErrorCode("FODC0004");
err.setXPathContext(context);
throw err;
}
String uri;
try {
uri = new URI(item.getBaseURI()).resolve(href).toString();
} catch (URISyntaxException e) {
XPathException err = new XPathException("Invalid base URI or href URI in collection catalog: ("
+ item.getBaseURI() + ", " + href + ")");
err.setErrorCode("FODC0004");
err.setXPathContext(context);
throw err;
}
if (finalStable) {
return new AnyURIValue(uri);
} else {
// stability not required, bypass the document pool and URI resolver
return context.getConfiguration().buildDocument(new StreamSource(uri));
}
}
};
return new ItemMappingIterator(documents, catalogueMapper);
}
/**
* Mapping function to process the files in a directory. This maps a sequence of external
* objects representing files to a sequence of DocumentInfo nodes representing the parsed
* contents of those files.
*/
protected static class FileExpander implements MappingFunction, DocumentInfo> {
private URIQueryParameters params;
boolean recurse = false;
int strip = Whitespace.XSLT;
int validation = Validation.STRIP;
Boolean xinclude = null;
boolean unparsed;
Maker parserMaker = null;
int onError = URIQueryParameters.ON_ERROR_FAIL;
FilenameFilter filter = null;
PipelineConfiguration pipe;
public FileExpander(URIQueryParameters params, PipelineConfiguration pipe) {
this.params = params;
this.pipe = pipe;
if (params != null) {
FilenameFilter f = params.getFilenameFilter();
if (f != null) {
filter = f;
}
Boolean r = params.getRecurse();
if (r != null) {
recurse = r;
}
Integer v = params.getValidationMode();
if (v != null) {
validation = v;
}
xinclude = params.getXInclude();
strip = params.getStripSpace();
if (strip == Whitespace.UNSPECIFIED) {
strip = Whitespace.XSLT;
}
unparsed = params.isUnparsed();
Integer e = params.getOnError();
if (e != null) {
onError = e;
}
Maker p = params.getXMLReaderMaker();
if (p != null) {
parserMaker = p;
}
}
}
/**
* Map one item to a sequence.
*
* @param item The item to be mapped.
* If context is supplied, this must be the same as context.currentItem().
* @return either (a) a SequenceIterator over the sequence of items that the supplied input
* item maps to, or (b) an Item if it maps to a single item, or (c) null if it maps to an empty
* sequence.
*/
public SequenceIterator map(ObjectValue item) throws XPathException {
File file = item.getObject();
if (file.isDirectory()) {
if (recurse) {
File[] files;
if (filter == null) {
files = file.listFiles();
} else {
files = file.listFiles(filter);
}
ObjectValue[] fileValues = new ObjectValue[files.length];
for (int f = 0; f < files.length; f++) {
fileValues[f] = new ObjectValue(files[f]);
}
FileExpander expander = new FileExpander(params, pipe);
return new MappingIterator, DocumentInfo>(new ArrayIterator(fileValues), expander);
} else {
return null;
}
} else if (unparsed) {
try {
Reader reader = new FileReader(file);
IntPredicate checker = pipe.getConfiguration().getValidCharacterChecker();
CharSequence content = UnparsedTextFunction.readFile(checker, reader, null);
String uri = file.toURI().toString();
TextFragmentValue doc = new TextFragmentValue(pipe.getConfiguration(), content, uri);
doc.setSystemId(file.toURI().toString());
doc.setConfiguration(pipe.getConfiguration());
return SingletonIterator.makeIterator(doc);
} catch (IOException err) {
if (onError == URIQueryParameters.ON_ERROR_IGNORE) {
return null;
} else if (onError == URIQueryParameters.ON_ERROR_WARNING) {
XPathException warn = new XPathException("Failed to read " + file.getPath(), err);
pipe.getErrorListener().warning(warn);
XPathException supp = new XPathException("The document will be excluded from the collection");
pipe.getErrorListener().warning(supp);
return null;
} else {
throw new XPathException("Failed to read " + file.getPath(), err);
}
}
} else {
try {
String escaped = file.toURI().toASCIIString();
Source source = new StreamSource(escaped);
ParseOptions options = new ParseOptions(pipe.getParseOptions()); // bug 2273
if (validation != Validation.STRIP && validation != Validation.PRESERVE) {
options.setSchemaValidationMode(validation);
}
if (xinclude != null) {
options.setXIncludeAware(xinclude);
}
if (parserMaker != null) {
options.setXMLReaderMaker(parserMaker);
}
if (params != null) {
int stripSpace = params.getStripSpace();
if (stripSpace == Whitespace.UNSPECIFIED) {
stripSpace = Whitespace.XSLT;
}
options.setStripSpace(stripSpace);
}
if (pipe.getController() != null) {
Builder b = pipe.getController().makeBuilder();
if (b instanceof TinyBuilder) {
((TinyBuilder) b).setStatistics(Statistics.SOURCE_DOCUMENT_STATISTICS);
}
Receiver s = b;
if (pipe.getController().getExecutable().stripsInputTypeAnnotations()) {
s = pipe.getConfiguration().getAnnotationStripper(s);
}
s.setPipelineConfiguration(pipe);
Sender.send(source, s, options);
NodeInfo node = b.getCurrentRoot();
b.reset();
return SingletonIterator.makeIterator(node);
} else {
DocumentInfo doc = pipe.getConfiguration().buildDocument(source, options);
return SingletonIterator.makeIterator(doc);
}
} catch (XPathException err) {
if (onError == URIQueryParameters.ON_ERROR_IGNORE) {
return null;
} else if (onError == URIQueryParameters.ON_ERROR_WARNING) {
if (!err.hasBeenReported()) {
pipe.getErrorListener().warning(err);
XPathException supp = new XPathException("The document will be excluded from the collection");
supp.setLocator(err.getLocator());
pipe.getErrorListener().warning(supp);
}
return null;
} else {
throw err;
}
}
}
}
}
}