All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.fryske_akademy.exist.jobs.DataSyncTask Maven / Gradle / Ivy

package org.fryske_akademy.exist.jobs;

/*-
 * #%L
 * exist-db-addons
 * %%
 * Copyright (C) 2020 - 2021 Fryske Akademy
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.exist.EXistException;
import org.exist.collections.Collection;
import org.exist.collections.triggers.TriggerException;
import org.exist.dom.persistent.DocumentImpl;
import org.exist.security.PermissionDeniedException;
import org.exist.security.PermissionFactory;
import org.exist.security.SecurityManager;
import org.exist.source.StringSource;
import org.exist.storage.DBBroker;
import org.exist.storage.SystemTask;
import org.exist.storage.lock.Lock;
import org.exist.storage.txn.Txn;
import org.exist.util.Configuration;
import org.exist.util.FileInputSource;
import org.exist.util.LockException;
import org.exist.util.MimeTable;
import org.exist.util.MimeType;
import org.exist.xmldb.XmldbURI;
import org.exist.xquery.CompiledXQuery;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQuery;
import org.exist.xquery.XQueryContext;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;

import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.FileVisitor;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.Properties;

/**
 Files in a directory specified by the parameter {@link #DATA_DIR} will be synchronized to a collection specified by the parameter {@link #COLLECTION_PARAM} recursively. If the target collection does not exist it will be created. Files and collections that are new or newer than the one in the target collection will be written to that collection. Files and collections that are not present in the source directory will be removed from the collection, this can be turned off via a boolean parameter {@link #REMOVE_FROM_COLLECTION_PARAM}. Owner and group for collections and documents can be provided in parameters {@link #OWNERPARAM} and {@link #GROUPPARAM}, otherwise they will be the same as the owner and group of the {@link #COLLECTION_PARAM root collection}. After syncing cache is cleared to prevent problems, this can be turned off via boolean parameter {@link #CLEAR_CACHE_PARAM}. NOTE that the sync will partially succeed when during syncing an exception occurs, collections and files added or removed before the exception will remain added/removed. Meant to be used as a start-up task, {@link DataSyncTaskCron} is meant to be scheduled as a cronjob.
 */
public class DataSyncTask implements SystemTask {


    private final static Logger LOG = LogManager.getLogger(DataSyncTask.class);

    /**
     * default data dir on filesystem (i.e. docker mount)
     */
    public static final String DATA_DIR = "/data";
    public static final String COLLECTION_PARAM = "collection";
    public static final String DATADIR_PARAM = "datadir";
    public static final String REMOVE_FROM_COLLECTION_PARAM = "removeNotInSource";
    public static final String LOGINFO_PARAM = "logInfo";
    public static final String OWNERPARAM = "owner";
    public static final String GROUPPARAM = "group";
    public static final String XMLDBPREFIX = "xmldb:exist://";
    /**
     * should be placed in the root of your jar
     */
    public static final String CLEAR_CACHE_XQ = "xquery version \"3.1\";\n" +
            "import module namespace cache = \"http://exist-db.org/xquery/cache\";\n" +
            "cache:clear()";
    private static final String OWNER_QUERY = "xquery version \"3.1\";\n" +
            "import module namespace sm = \"http://exist-db.org/xquery/securitymanager\";\n" +
            "for $i in uri-collection(#c#) return sm:chown($i,#o#)";
    private static final String GROUP_QUERY = "xquery version \"3.1\";\n" +
            "import module namespace sm = \"http://exist-db.org/xquery/securitymanager\";\n" +
            "for $i in uri-collection(#c#) return sm:chgrp($i,#g#)";
    private static final String OWNER_GROUP_QUERY = "xquery version \"3.1\";\n" +
            "import module namespace sm = \"http://exist-db.org/xquery/securitymanager\";\n" +
            "for $i in uri-collection(#c#) return (sm:chown($i,#o#), sm:chgrp($i,#g#))";
    public static final String CLEAR_CACHE_PARAM = "clearCache";

    private boolean removeMissingInSource;
    private String rootCollection = null;
    private XmldbURI dataRoot;
    private Path sourcePath;

    private boolean clearCache = true;
    private String owner, group;
    private boolean logInfo = true;

    @Override
    public String getName() {
        return "Data Sync";
    }

    @Override
    public void configure(Configuration config, Properties properties) throws EXistException {
        rootCollection = properties.getProperty(COLLECTION_PARAM);
        try {
            dataRoot = XmldbURI.xmldbUriFor(rootCollection);
        } catch (URISyntaxException e) {
            throw new EXistException(e);
        }
        owner = properties.getProperty(OWNERPARAM, "");
        group = properties.getProperty(GROUPPARAM, "");
        removeMissingInSource = Boolean.parseBoolean(properties.getProperty(REMOVE_FROM_COLLECTION_PARAM, "true"));
        logInfo = Boolean.parseBoolean(properties.getProperty(LOGINFO_PARAM, "true"));
        sourcePath = new File(properties.getProperty(DATADIR_PARAM, DATA_DIR)).toPath();
        clearCache = Boolean.parseBoolean(properties.getProperty(CLEAR_CACHE_PARAM, "true"));
    }

    @Override
    public void execute(DBBroker broker, Txn transaction) throws EXistException {
        final List visited = new ArrayList<>();
        boolean success = false;
        try {
            if (rootCollection == null) {
                throw new EXistException(String.format("You have to provide %s parameter in conf.xml", COLLECTION_PARAM));
            }
            LOG.log(Level.INFO, String.format("start sync %s to %s", sourcePath, rootCollection));
            if (removeMissingInSource) {
                // remove files not present in the source folder
                try (Collection coll = broker.openCollection(dataRoot, Lock.LockMode.WRITE_LOCK);) {
                    removeNotInSource(coll, broker, transaction, dataRoot);
                }
            }
            Files.walkFileTree(sourcePath,
                    new FileVisitor() {

                        @Override
                        public FileVisitResult preVisitDirectory(Path path, BasicFileAttributes basicFileAttributes) throws IOException {
                            try {
                                /*
                                 create and lock collection
                                 NOTE we cannot rely on any order of processing
                                 every directory we visit will become a collection and will be write locked
                                 */
                                XmldbURI uri = dataRoot.append(sourcePath.relativize(path).toString());
                                broker.getOrCreateCollection(transaction, uri);
                                if (group.isEmpty()) {
                                    Collection parent = broker.getCollection(dataRoot);
                                    group = parent.getPermissionsNoLock().getGroup().getName();
                                }
                                if (owner.isEmpty()) {
                                    Collection parent = broker.getCollection(dataRoot);
                                    owner = parent.getPermissionsNoLock().getOwner().getName();
                                }
                                visited.add(broker.openCollection(uri, Lock.LockMode.WRITE_LOCK));
                            } catch (PermissionDeniedException | TriggerException e) {
                                throw new IOException(e);
                            }
                            return FileVisitResult.CONTINUE;
                        }

                        @Override
                        public FileVisitResult visitFile(Path path, BasicFileAttributes basicFileAttributes) throws IOException {
                            try {
                                /*
                                the directory containing the file we visit, relative from the source root
                                equals, when encoded, the uri of the collection, relative from the collection root
                                 */
                                XmldbURI curi = dataRoot.append(sourcePath.relativize(path.getParent()).toString());
                                Collection targetCollection = findCollection(curi, visited);
                                XmldbURI docUri = XmldbURI.xmldbUriFor(URLEncoder.encode(path.getFileName().toString(), StandardCharsets.UTF_8),false);
                                DocumentImpl document = targetCollection.getDocument(broker, docUri);
                                if (document!=null && LOG.isDebugEnabled())
                                    LOG.debug(String.format("file: %s, created %s, modified %s; coll: %s, modified %s",
                                        path.getFileName(),new Date(basicFileAttributes.creationTime().toMillis()),
                                        new Date(basicFileAttributes.lastModifiedTime().toMillis()),
                                        docUri,(document==null?"":new Date(document.getLastModified()))));
                                if (document == null ||
                                    document.getLastModified() < basicFileAttributes.creationTime().toMillis() ||
                                    document.getLastModified() < basicFileAttributes.lastModifiedTime().toMillis()
                                ) {
                                    // only store new and newer files
                                    storeInCollection(path, docUri, targetCollection, transaction, broker);
                                    if (logInfo) LOG.log(Level.INFO, (document == null ? "created: " : "updated: ") + curi.append(docUri));
                                } else {
                                    if (LOG.isDebugEnabled())
                                    LOG.log(Level.DEBUG,
                                            String.format("Not updated! file: %s, created %s, modified %s; coll: %s, modified %s",
                                                    path.getFileName(),new Date(basicFileAttributes.creationTime().toMillis()),
                                                    new Date(basicFileAttributes.lastModifiedTime().toMillis()),
                                                    docUri,(document==null?"":new Date(document.getLastModified()))));
                                }
                            } catch (Exception e) {
                                throw new IOException(e);
                            }
                            return FileVisitResult.CONTINUE;
                        }

                        @Override
                        public FileVisitResult visitFileFailed(Path path, IOException e) {
                            /*
                            as soon as we encounter an error processing terminates
                            this method will be called by java when a directory or file cannot be read,
                            NOT when an IOException is thrown from within a method
                             */
                            return handleIOException(path, e);
                        }

                        @Override
                        public FileVisitResult postVisitDirectory(Path path, IOException e) throws IOException {
                            if (e != null) {
                                return handleIOException(path, e);
                            }
                            /*
                            directory processed, save and close collection
                             */
                            XmldbURI curi = dataRoot.append(sourcePath.relativize(path).toString());
                            Collection targetCollection = findCollection(curi, visited);
                            targetCollection.close();
                            return FileVisitResult.CONTINUE;
                        }
                    });

            success=true;
            LOG.log(Level.INFO, String.format("%s sync %s to %s", "success: ", sourcePath, rootCollection));
            chownGrp(visited,broker);
        } catch (Exception e) {
            LOG.log(Level.INFO, String.format("%s sync %s to %s", "failed: ", sourcePath, rootCollection));
            throw new EXistException(e);
        } finally {
            visited.forEach(collection -> collection.close());
            if (success) transaction.commit(); else transaction.abort();
            clearCache(broker);
        }
    }

    private Collection findCollection(XmldbURI curi, List visited) throws IOException {
        final String collUri = stripXMLDBPREFIX(curi);
        return visited.stream()
                .filter(c -> c.getURI().toString().equals(collUri)).findFirst()
                .orElseThrow(() -> new IOException(collUri + " not found"));
    }

    private FileVisitResult handleIOException(Path path, IOException e) {
        LOG.error("Processing failed: " + path, e);
        return FileVisitResult.TERMINATE;
    }

    private void clearCache(DBBroker broker) {
        if (clearCache) {
        /*
        now clear cache, because of

WARN  (EmbeddedXMLStreamReader.java [verifyOriginNodeId]:239) - Expected node id 1.6.2.2.4.5.3, got 1.4.2.6.2.2.3.1; resyncing address
WARN  (TransactionManager.java [close]:409) - Transaction was not committed or aborted, auto aborting!
ERROR (EXistServlet.java [doPost]:488) - java.lang.NullPointerException

        and because cache may contain invalid data now

         */
            try {
                XQuery xQuery = new XQuery();
                CompiledXQuery query = xQuery.compile(new XQueryContext(broker.getDatabase()), new StringSource(CLEAR_CACHE_XQ));
                xQuery.execute(broker, query, null);
            } catch (XPathException | IOException | PermissionDeniedException ex) {
                LOG.error("cache clear failed", ex);
            }
        }
    }

    private void chownGrp(List visited,DBBroker broker) throws XPathException, PermissionDeniedException, IOException {
        XQuery xQuery = new XQuery();
        for (Collection col : visited) {
            CompiledXQuery query = null;
            String q = "";
            String uri = col.getURI().toString();
            if (owner.isEmpty()) {
                if (!group.isEmpty()) {
                    q = GROUP_QUERY.replace("#c#", quote(uri)).replace("#g#", quote(group));
                    query = xQuery.compile(new XQueryContext(broker.getDatabase()), new StringSource(q));
                }
            } else {
                if (group.isEmpty()) {
                    q = OWNER_QUERY.replace("#c#", quote(uri)).replace("#o#", quote(owner));
                    query = xQuery.compile(new XQueryContext(broker.getDatabase()), new StringSource(q));
                } else {
                    q = OWNER_GROUP_QUERY.replace("#c#", quote(uri)).replace("#g#", quote(group)).replace("#o#", quote(owner));
                    query = xQuery.compile(new XQueryContext(broker.getDatabase()), new StringSource(q));
                }
            }

            if (query != null) xQuery.execute(broker, query, null);
        }
    }
    private String quote(String s) { return '"'+s+'"';}

    private String stripXMLDBPREFIX(XmldbURI curi) {
        return curi.toString().startsWith(XMLDBPREFIX) ?
                curi.toString().substring(XMLDBPREFIX.length()) :
                curi.toString();
    }

    /**
     * recursively remove data from a collection when resource is not present on filesystem.
     *
     * @param broker
     * @param transaction
     * @param parent      the uri of the collection to be emptied
     * @throws PermissionDeniedException
     * @throws LockException
     * @throws IOException
     * @throws TriggerException
     * @throws URISyntaxException
     */
    private void removeNotInSource(Collection coll, DBBroker broker, Txn transaction, XmldbURI parent) throws
            PermissionDeniedException, LockException, IOException, TriggerException, URISyntaxException {
        if (parent == null || coll == null) return;

        // construct the baseuri, without xmldb:exist://
        final String baseUri = stripXMLDBPREFIX(dataRoot);
        for (Iterator it = coll.collectionIterator(broker); it.hasNext(); ) {
            XmldbURI xmldbURI = parent.append(it.next());
            XmldbURI uri = xmldbURI.toString().startsWith(XMLDBPREFIX) ?
                    XmldbURI.xmldbUriFor(xmldbURI.toString().substring(XMLDBPREFIX.length())) :
                    xmldbURI;
            checkRemove(uri, coll, transaction, broker, true, baseUri);
        }
        for (Iterator it = coll.iterator(broker); it.hasNext(); ) {
            XmldbURI uri = it.next().getURI();
            checkRemove(uri, coll, transaction, broker, false, baseUri);
        }
    }

    /**
     * check wether a resource is still present in the filesystem by string comparing the
     * URI encoded path in the file system below the filesystem data root to the xmldburi
     * below the data root in exist-db
     *
     * @param uri
     * @param collection
     * @param transaction
     * @param broker
     * @param isCollection
     * @param baseUri      the first part of the uri that must be stripped of the uri for comparison with the filesystem
     * @throws IOException
     * @throws PermissionDeniedException
     * @throws TriggerException
     * @throws LockException
     * @throws URISyntaxException
     */
    private void checkRemove(XmldbURI uri, Collection collection, Txn transaction, DBBroker broker,
                             boolean isCollection, String baseUri) throws
            IOException, PermissionDeniedException, TriggerException, LockException, URISyntaxException {
        String u = uri.toString().substring(baseUri.length() + (baseUri.endsWith("/") ? 0 : 1));
        String decoded = URLDecoder.decode(u, "UTF-8"); // string to append to sourcePath
        if (!Files.exists(sourcePath.resolve(decoded))) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(String.format("removing %s, constructed from %s", uri.toString(), sourcePath.resolve(decoded)));
            }
            if (isCollection) {
                broker.removeCollection(transaction, broker.getCollection(uri));
                broker.saveCollection(transaction, collection);
            } else {
                broker.removeResource(transaction, collection.getDocument(broker, uri));
            }
        } else if (isCollection) {
            try (Collection coll = broker.openCollection(uri, Lock.LockMode.WRITE_LOCK);) {
                removeNotInSource(coll, broker, transaction, uri);
            }
        }
    }

    /**
     * This method calls {@link DBBroker#storeDocument(Txn, XmldbURI, Node, MimeType, Collection)} and
     * {@link PermissionFactory#chown(DBBroker, DocumentImpl, Optional, Optional)} 
     *
     * @param fileToStore
     * @param documentInCollection
     * @param collection
     * @param transaction
     * @param broker
     * @throws EXistException
     * @throws PermissionDeniedException
     * @throws SAXException
     * @throws LockException
     * @throws IOException
     */
    protected void storeInCollection(Path fileToStore, XmldbURI documentInCollection, Collection collection, Txn
            transaction, DBBroker broker) throws Exception {
        FileInputSource source = new FileInputSource(fileToStore);
        MimeType mimeType = MimeTable.getInstance().getContentTypeFor(fileToStore.getFileName().toString());
        if (mimeType!=null) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(String.format("storing %s in %s", documentInCollection, collection.getURI()));
            }
            broker.storeDocument(transaction,documentInCollection,source,mimeType,collection);
        } else {
            LOG.log(Level.WARN, String.format("unable to determine mimetype for %s",fileToStore.getFileName()));
        }
    }

    @Override
    public boolean afterCheckpoint() {
        return false;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy