org.exist.xquery.modules.file.Sync Maven / Gradle / Ivy
/*
* eXist-db Open Source Native XML Database
* Copyright (C) 2001 The eXist-db Authors
*
* [email protected]
* http://www.exist-db.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package org.exist.xquery.modules.file;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.StandardCopyOption;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;
import java.net.URISyntaxException;
import java.util.*;
import java.util.stream.Stream;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamSource;
import org.apache.tools.ant.DirectoryScanner;
import org.exist.collections.Collection;
import org.exist.dom.persistent.BinaryDocument;
import org.exist.dom.persistent.DocumentImpl;
import org.exist.dom.QName;
import org.exist.dom.memtree.MemTreeBuilder;
import org.exist.security.PermissionDeniedException;
import org.exist.storage.lock.Lock.LockMode;
import org.exist.storage.lock.ManagedLock;
import org.exist.storage.serializers.EXistOutputKeys;
import org.exist.storage.serializers.Serializer;
import org.exist.util.FileUtils;
import org.exist.util.LockException;
import org.exist.util.serializer.SAXSerializer;
import org.exist.util.serializer.SerializerPool;
import org.exist.xmldb.XmldbURI;
import org.exist.xquery.BasicFunction;
import org.exist.xquery.Cardinality;
import org.exist.xquery.ErrorCodes;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.functions.map.AbstractMapType;
import org.exist.xquery.util.SerializerUtils;
import org.exist.xquery.value.*;
import org.exist.xslt.TransformerFactoryAllocator;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import uk.ac.ic.doc.slurp.multilock.MultiLock;
public class Sync extends BasicFunction {
public static final String PRUNE_OPT = "prune";
public static final String AFTER_OPT = "after";
public static final String EXCLUDES_OPT = "excludes";
public static final QName FILE_SYNC_ELEMENT = new QName("sync", FileModule.NAMESPACE_URI);
public static final QName FILE_UPDATE_ELEMENT = new QName("update", FileModule.NAMESPACE_URI);
public static final QName FILE_DELETE_ELEMENT = new QName("delete", FileModule.NAMESPACE_URI);
public static final QName FILE_ERROR_ELEMENT = new QName("error", FileModule.NAMESPACE_URI);
// TODO(JL) Figure out which namespace all attributes should be in (possible breaking change)
// https://github.com/eXist-db/exist/issues/4207
public static final QName FILE_COLLECTION_ATTRIBUTE = new QName("collection", FileModule.NAMESPACE_URI);
public static final QName FILE_DIR_ATTRIBUTE = new QName("dir", FileModule.NAMESPACE_URI);
public static final QName FILE_ATTRIBUTE = new QName("file", XMLConstants.NULL_NS_URI);
public static final QName NAME_ATTRIBUTE = new QName("name", XMLConstants.NULL_NS_URI);
public static final QName COLLECTION_ATTRIBUTE = new QName("collection", XMLConstants.NULL_NS_URI);
public static final QName TYPE_ATTRIBUTE = new QName("type", XMLConstants.NULL_NS_URI);
public static final QName MODIFIED_ATTRIBUTE = new QName("modified", XMLConstants.NULL_NS_URI);
public static final FunctionSignature signature =
new FunctionSignature(
new QName("sync", FileModule.NAMESPACE_URI, FileModule.PREFIX),
"Synchronize a collection with a directory hierarchy." +
"This method is only available to the DBA role. ",
new SequenceType[]{
new FunctionParameterSequenceType("collection", Type.STRING, Cardinality.EXACTLY_ONE,
"Absolute path to the collection to synchronize to disk."),
new FunctionParameterSequenceType("targetPath", Type.ITEM, Cardinality.EXACTLY_ONE,
"The path or URI to the target directory. Relative paths resolve against EXIST_HOME."),
new FunctionParameterSequenceType("dateTimeOrOptionsMap", Type.ITEM, Cardinality.ZERO_OR_ONE,
"Options as map(*). The available settings are:" +
"\"" + PRUNE_OPT + "\": delete any file/dir that does not correspond to a doc/collection in the DB. " +
"\"" + AFTER_OPT + "\": only resources modified after this date will be taken into account." +
"\"" + EXCLUDES_OPT + "\": files on the file system matching any of these patterns will be left untouched." +
"(deprecated) If the third parameter is of type xs:dateTime, it is the same as setting the \"" + AFTER_OPT + "\" option.")
},
new FunctionReturnSequenceType(Type.DOCUMENT, Cardinality.EXACTLY_ONE, "A report (file:sync) which files and directories were updated (file:update) or deleted (file:delete).")
);
private static final Properties DEFAULT_PROPERTIES = new Properties();
static {
DEFAULT_PROPERTIES.put(OutputKeys.INDENT, "yes");
DEFAULT_PROPERTIES.put(OutputKeys.OMIT_XML_DECLARATION, "no");
DEFAULT_PROPERTIES.put(EXistOutputKeys.EXPAND_XINCLUDES, "no");
DEFAULT_PROPERTIES.put(OutputKeys.ENCODING, "UTF-8");
}
private Properties outputProperties = new Properties();
public Sync(final XQueryContext context, final FunctionSignature signature) {
super(context, signature);
}
@Override
public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException {
if (!context.getSubject().hasDbaRole()) {
throw new XPathException(this, "Function file:sync is only available to the DBA role");
}
final String collectionPath = args[0].getStringValue();
final String target = args[1].getStringValue();
final Map options = getOptions(args[2]);
return startSync(target, collectionPath, options);
}
private Map getOptions(final Sequence parameter) throws XPathException {
final Map options = new HashMap<>();
options.put(AFTER_OPT, Sequence.EMPTY_SEQUENCE);
options.put(PRUNE_OPT, new BooleanValue(this, false));
options.put(EXCLUDES_OPT, Sequence.EMPTY_SEQUENCE);
if (parameter.isEmpty()) {
outputProperties = DEFAULT_PROPERTIES;
return options;
}
final Item item = parameter.itemAt(0);
if (item.getType() == Type.MAP) {
final AbstractMapType optionsMap = (AbstractMapType) item;
outputProperties = SerializerUtils.getSerializationOptions(this, optionsMap);
// override defaults set in SerializerUtils
for(String p : DEFAULT_PROPERTIES.stringPropertyNames()) {
if (optionsMap.get(new StringValue(this, p)).isEmpty()) {
outputProperties.setProperty(p, DEFAULT_PROPERTIES.getProperty(p));
}
}
final Sequence seq = optionsMap.get(new StringValue(this, EXCLUDES_OPT));
if (!seq.isEmpty() && seq.getItemType() != Type.STRING) {
throw new XPathException(this, ErrorCodes.XPTY0004,
"Invalid value for option \"excludes\", expected xs:string* got " +
Type.getTypeName(seq.getItemType()));
}
options.put(EXCLUDES_OPT, seq);
checkOption(optionsMap, PRUNE_OPT, Type.BOOLEAN, options);
checkOption(optionsMap, AFTER_OPT, Type.DATE_TIME, options);
} else if (parameter.itemAt(0).getType() == Type.DATE_TIME) {
options.put(AFTER_OPT, parameter);
} else {
throw new XPathException(this, ErrorCodes.XPTY0004,
"Invalid 3rd parameter, allowed parameter types are xs:dateTime or map(*) got " + Type.getTypeName(item.getType()));
}
return options;
}
private void checkOption(
final AbstractMapType optionsMap,
final String name,
final int expectedType,
final Map options
) throws XPathException {
final Sequence p = optionsMap.get(new StringValue(this, name));
if (p.isEmpty()) {
return; // nothing to do, continue
}
if (p.hasMany() || !Type.subTypeOf(p.getItemType(),expectedType)) {
throw new XPathException(this, ErrorCodes.XPTY0004,
"Invalid value type for option \"" + name + "\", expected " +
Type.getTypeName(expectedType) + " got " +
Type.getTypeName(p.itemAt(0).getType()));
}
options.put(name, p);
}
private Sequence startSync(
final String target,
final String collectionPath,
final Map options
) throws XPathException {
final Date startDate = options.get(AFTER_OPT).hasOne() ? ((DateTimeValue) options.get(AFTER_OPT)).getDate() : null;
final boolean prune = ((BooleanValue) options.get(PRUNE_OPT)).getValue();
final List excludes = new ArrayList<>(Collections.emptyList());
for (final SequenceIterator si = options.get(EXCLUDES_OPT).iterate(); si.hasNext(); ) {
excludes.add(si.nextItem().getStringValue());
}
final Path p = FileModuleHelper.getFile(target, this);
context.pushDocumentContext();
final MemTreeBuilder output = context.getDocumentBuilder();
final Path targetDir;
try {
if (p.isAbsolute()) {
targetDir = p;
} else {
final Optional home = context.getBroker().getConfiguration().getExistHome();
targetDir = FileUtils.resolve(home, target);
}
output.startDocument();
output.startElement(FILE_SYNC_ELEMENT, null);
output.addAttribute(FILE_COLLECTION_ATTRIBUTE, collectionPath);
output.addAttribute(FILE_DIR_ATTRIBUTE, targetDir.toAbsolutePath().toString());
final String rootTargetAbsPath = targetDir.toAbsolutePath().toString();
final String separator = rootTargetAbsPath.endsWith(File.separator) ? "" : File.separator;
syncCollection(XmldbURI.create(collectionPath), rootTargetAbsPath + separator, targetDir, startDate, prune, excludes, output);
output.endElement();
output.endDocument();
} catch (final PermissionDeniedException | LockException e) {
throw new XPathException(this, e);
} finally {
context.popDocumentContext();
}
return output.getDocument();
}
private void syncCollection(
final XmldbURI collectionPath,
final String rootTargetAbsPath,
final Path targetDir,
final Date startDate,
final boolean prune,
final List excludes,
final MemTreeBuilder output
) throws PermissionDeniedException, LockException {
final Path targetDirectory;
try {
targetDirectory = Files.createDirectories(targetDir);
} catch (final IOException ioe) {
reportError(output, "Failed to create output directory: " + targetDir.toAbsolutePath() +
" for collection " + collectionPath);
return;
}
if (!Files.isWritable(targetDirectory)) {
reportError(output, "Failed to write to output directory: " + targetDirectory.toAbsolutePath());
return;
}
final List subCollections = handleCollection(collectionPath, rootTargetAbsPath, targetDirectory, startDate, prune, excludes, output);
for (final XmldbURI childURI : subCollections) {
final Path childDir = targetDirectory.resolve(childURI.lastSegment().toString());
syncCollection(collectionPath.append(childURI), rootTargetAbsPath, childDir, startDate, prune, excludes, output);
}
}
private List handleCollection(
final XmldbURI collectionPath,
final String rootTargetAbsPath,
final Path targetDirectory,
final Date startDate,
final boolean prune,
final List excludes,
final MemTreeBuilder output
) throws PermissionDeniedException, LockException {
try (final Collection collection = context.getBroker().openCollection(collectionPath, LockMode.READ_LOCK)) {
if (collection == null) {
reportError(output, "Collection not found: " + collectionPath);
return Collections.emptyList();
}
if (prune) {
pruneCollectionEntries(collection, rootTargetAbsPath, targetDirectory, excludes, output);
}
for (final Iterator i = collection.iterator(context.getBroker()); i.hasNext(); ) {
final DocumentImpl doc = i.next();
final Path targetFile = targetDirectory.resolve(doc.getFileURI().toASCIIString());
saveFile(targetFile, doc, startDate, output);
}
final List subCollections = new ArrayList<>(collection.getChildCollectionCount(context.getBroker()));
for (final Iterator i = collection.collectionIterator(context.getBroker()); i.hasNext(); ) {
subCollections.add(i.next());
}
return subCollections;
}
}
private void pruneCollectionEntries(
final Collection collection,
final String rootTargetAbsPath,
final Path targetDir,
final List excludes,
final MemTreeBuilder output) {
try (final Stream fileStream = Files.walk(targetDir, 1)) {
fileStream.forEach(path -> {
try {
// guard against deletion of output folder
if (rootTargetAbsPath.startsWith(path.toString())) {
return;
}
if (isExcludedPath(rootTargetAbsPath, path, excludes)) {
return;
}
final String fileName = path.getFileName().toString();
final XmldbURI dbname = XmldbURI.xmldbUriFor(fileName);
final String currentCollection = collection.getURI().getCollectionPath();
if (collection.hasDocument(context.getBroker(), dbname)
|| collection.hasChildCollection(context.getBroker(), dbname)
|| currentCollection.endsWith("/" + fileName)) {
return;
}
// handle non-empty directories
if (Files.isDirectory(path)) {
deleteWithExcludes(rootTargetAbsPath, path, excludes, output);
} else {
Files.deleteIfExists(path);
// reporting
output.startElement(FILE_DELETE_ELEMENT, null);
output.addAttribute(FILE_ATTRIBUTE, path.toAbsolutePath().toString());
output.addAttribute(NAME_ATTRIBUTE, fileName);
output.endElement();
}
} catch (final IOException | URISyntaxException
| PermissionDeniedException | LockException e) {
reportError(output, e.getMessage());
}
});
} catch (final IOException e) {
reportError(output, e.getMessage());
}
}
private void saveFile(final Path targetFile, final DocumentImpl doc, final Date startDate, final MemTreeBuilder output) throws LockException {
// the resource has not changed in the selected period
if (startDate != null && doc.getLastModified() <= startDate.getTime()) {
return;
}
try (final ManagedLock lock = context.getBroker().getBrokerPool().getLockManager().acquireDocumentReadLock(doc.getURI())) {
// the file on the disk appears to be up-to-date
if (Files.exists(targetFile) && Files.getLastModifiedTime(targetFile).compareTo(FileTime.fromMillis(doc.getLastModified())) >= 0) {
return;
}
output.startElement(FILE_UPDATE_ELEMENT, null);
output.addAttribute(FILE_ATTRIBUTE, targetFile.toAbsolutePath().toString());
output.addAttribute(NAME_ATTRIBUTE, doc.getFileURI().toString());
output.addAttribute(COLLECTION_ATTRIBUTE, doc.getCollection().getURI().toString());
output.addAttribute(MODIFIED_ATTRIBUTE, new DateTimeValue(this, new Date(doc.getLastModified())).getStringValue());
if (doc.getResourceType() == DocumentImpl.BINARY_FILE) {
output.addAttribute(TYPE_ATTRIBUTE, "binary");
output.endElement();
saveBinary(targetFile, (BinaryDocument) doc, output);
} else {
output.addAttribute(TYPE_ATTRIBUTE, "xml");
output.endElement();
saveXML(targetFile, doc, output);
}
} catch (final XPathException e) {
reportError(output, e.getMessage());
} catch (final IOException e) {
reportError(output, "IO error while saving file: " + targetFile.toAbsolutePath().toString());
}
}
private void saveXML(final Path targetFile, final DocumentImpl doc, final MemTreeBuilder output) throws IOException {
final SAXSerializer sax = (SAXSerializer) SerializerPool.getInstance().borrowObject(SAXSerializer.class);
try {
final boolean isRepoXML = Files.exists(targetFile) && FileUtils.fileName(targetFile).equals("repo.xml");
if (isRepoXML) {
processRepoDesc(targetFile, doc, sax, output);
} else {
final Serializer serializer = context.getBroker().borrowSerializer();
try (final Writer writer = new OutputStreamWriter(new BufferedOutputStream(Files.newOutputStream(targetFile)), StandardCharsets.UTF_8)) {
sax.setOutput(writer, outputProperties);
serializer.setProperties(outputProperties);
serializer.setSAXHandlers(sax, sax);
serializer.toSAX(doc);
} finally {
context.getBroker().returnSerializer(serializer);
}
}
} catch (final SAXException e) {
reportError(output, "SAX exception while saving file " + targetFile.toAbsolutePath().toString() + ": " + e.getMessage());
} finally {
SerializerPool.getInstance().returnObject(sax);
}
}
/**
* Merge repo.xml modified by user with original file. This is necessary because we have to
* remove sensitive information during upload (default password) and need to restore it
* when the package is synchronized back to disk.
*/
private void processRepoDesc(final Path targetFile, final DocumentImpl doc, final SAXSerializer sax, final MemTreeBuilder output) {
try {
final DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
final Document original = builder.parse(targetFile.toFile());
final Serializer serializer = context.getBroker().borrowSerializer();
try (final Writer writer = new OutputStreamWriter(new BufferedOutputStream(Files.newOutputStream(targetFile)), StandardCharsets.UTF_8)) {
sax.setOutput(writer, outputProperties);
final StreamSource styleSource = new StreamSource(Sync.class.getResourceAsStream("repo.xsl"));
final SAXTransformerFactory factory = TransformerFactoryAllocator.getTransformerFactory(context.getBroker().getBrokerPool());
final TransformerHandler handler = factory.newTransformerHandler(styleSource);
handler.getTransformer().setParameter("original", original.getDocumentElement());
handler.setResult(new SAXResult(sax));
serializer.reset();
serializer.setProperties(outputProperties);
serializer.setSAXHandlers(handler, handler);
serializer.toSAX(doc);
} finally {
context.getBroker().returnSerializer(serializer);
}
} catch (final ParserConfigurationException e) {
reportError(output, "Parser exception while saving file " + targetFile.toAbsolutePath().toString() + ": " + e.getMessage());
} catch (final SAXException e) {
reportError(output, "SAX exception while saving file " + targetFile.toAbsolutePath().toString() + ": " + e.getMessage());
} catch (final IOException e) {
reportError(output, "IO exception while saving file " + targetFile.toAbsolutePath().toString() + ": " + e.getMessage());
} catch (final TransformerException e) {
reportError(output, "Transformation exception while saving file " + targetFile.toAbsolutePath().toString() + ": " + e.getMessage());
}
}
private void saveBinary(final Path targetFile, final BinaryDocument binary, final MemTreeBuilder output) {
try (final InputStream is = context.getBroker().getBinaryResource(binary)) {
Files.copy(is, targetFile, StandardCopyOption.REPLACE_EXISTING);
} catch (final Exception e) {
reportError(output, e.getMessage());
}
}
private void reportError(final MemTreeBuilder output, final String msg) {
output.startElement(FILE_ERROR_ELEMENT, null);
output.characters(msg);
output.endElement();
}
/**
* We need to convert to a relative path in relation to rootTargetAbsPath,
* as all the exclusion patterns are relative to rootTargetAbsPath.
*
* @param rootTargetAbsPath the root target (abs)path
* @param path (abs)path to check for being excluded. Should be subdir of rootTargetAbsPath
* @param excludes exclude patterns (in the convention of DirectoryScanner.match)
* @return true if the (rel)path in question is matched by some of the exclusion patterns
*/
private static boolean isExcludedPath(final String rootTargetAbsPath, final Path path, final List excludes) {
if (excludes.isEmpty()) {
return false;
}
// root folder cannot be excluded
// path will then also be one character shorter than rootTargetApsPath
// and throw when attempting to construct the relative path
if (rootTargetAbsPath.startsWith(path.toString())) {
return false;
}
final String absPath = path.toAbsolutePath().toString();
final String relPath = absPath.substring(rootTargetAbsPath.length());
final String normalizedPath = relPath.startsWith(File.separator)
? relPath.substring(File.separator.length())
: relPath;
return matchAny(excludes, normalizedPath);
}
/**
* Check if any of the patterns matches the path.
*/
public static boolean matchAny(final Iterable patterns, final String path) {
for (final String pattern : patterns) {
if (DirectoryScanner.match(pattern, path)) {
return true;
}
}
return false;
}
private static void deleteWithExcludes(final String root, final Path path, final List excludes, final MemTreeBuilder output) throws IOException {
if (Files.isDirectory(path)) {
Files.walkFileTree(path, new DeleteDirWithExcludesVisitor(root, excludes, output));
} else {
Files.deleteIfExists(path);
}
}
private static class DeleteDirWithExcludesVisitor extends SimpleFileVisitor {
private final List excludes;
private final String root;
private final MemTreeBuilder output;
private boolean hasExcludedChildren = false;
public DeleteDirWithExcludesVisitor(final String root, final List excludes, final MemTreeBuilder output) {
this.output = output;
this.excludes = excludes;
this.root = root;
}
@Override
public FileVisitResult preVisitDirectory(final Path dir, final BasicFileAttributes attrs) {
if (isExcludedPath(root, dir, excludes)) {
hasExcludedChildren = true;
return FileVisitResult.SKIP_SUBTREE;
}
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) throws IOException {
if (isExcludedPath(root, file, excludes)) {
hasExcludedChildren = true;
return FileVisitResult.CONTINUE;
}
Files.deleteIfExists(file);
output.startElement(FILE_DELETE_ELEMENT, null);
output.addAttribute(FILE_ATTRIBUTE, file.toAbsolutePath().toString());
output.addAttribute(NAME_ATTRIBUTE, file.getFileName().toString());
output.endElement();
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult postVisitDirectory(final Path dir, final IOException exc) throws IOException {
if (exc != null) {
throw exc;
}
// deletion would fail due to non-empty directory
if (hasExcludedChildren) {
return FileVisitResult.CONTINUE;
}
Files.deleteIfExists(dir);
output.startElement(FILE_DELETE_ELEMENT, null);
output.addAttribute(FILE_ATTRIBUTE, dir.toAbsolutePath().toString());
output.addAttribute(NAME_ATTRIBUTE, dir.getFileName().toString());
output.endElement();
return FileVisitResult.CONTINUE;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy