com.day.cq.dam.core.process.UnarchiverProcess Maven / Gradle / Ivy

/*
 * Copyright 1997-2011 Day Management AG
 * Barfuesserplatz 6, 4001 Basel, Switzerland
 * All Rights Reserved.
 *
 * This software is the confidential and proprietary information of
 * Day Management AG, ("Confidential Information"). You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Day.
 */
package com.day.cq.dam.core.process;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigInteger;
import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.util.HashMap;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import javax.jcr.Node;
import javax.jcr.RepositoryException;
import javax.jcr.Session;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Service;
import org.apache.jackrabbit.util.Text;
import org.apache.sling.api.resource.Resource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.day.cq.commons.jcr.JcrUtil;
import com.day.cq.dam.api.Asset;
import com.day.cq.dam.api.AssetManager;
import com.day.cq.dam.api.DamConstants;
import com.day.cq.dam.api.Rendition;
import com.day.cq.dam.commons.process.AbstractAssetWorkflowProcess;
import com.day.cq.workflow.WorkflowException;
import com.day.cq.workflow.WorkflowSession;
import com.day.cq.workflow.exec.WorkItem;
import com.day.cq.workflow.exec.WorkflowProcess;
import com.day.cq.workflow.metadata.MetaDataMap;

/**
 * The UnarchiverProcess extracts ZIP files as they are uploaded into DAM.
 */
@Component
@Service(value = WorkflowProcess.class)
@Property(name = "process.label", value = "DAM Unarchiver Process")
public class UnarchiverProcess extends AbstractAssetWorkflowProcess {

    protected static final Logger log = LoggerFactory.getLogger(UnarchiverProcess.class);

    protected static final int BUFFER = 2048;

    protected static final String MIME_TYPE_ZIP = "application/zip";
    protected static final String FILE_EXT_ZIP = ".zip";

    public final void execute(final WorkItem item, final WorkflowSession wfSession, final MetaDataMap args)
            throws WorkflowException {

        final Session session = wfSession.getSession();
        final Asset asset = getAssetFromPayload(item, session);

        if (null != asset) {

            final String assetPath = asset.getPath();

            if (!isZipFile(asset)) {
                log.info("execute: ignoring asset [{}] as it is not a ZIP archive.", assetPath);
                return;
            }

            final Rendition original = asset.getOriginal();
            if (null != original) {

                final InputStream stream = original.getStream();
                if (null != stream) {

                    final AssetManager assetManager = getAssetManager(session);

                    if (null == assetManager) {
                        throw new WorkflowException("asset manager unavailable");
                    }

                    final UnarchiverContext context = new UnarchiverContext(session, assetManager, asset, args);

                    if (context.isDisableExtraction()) {
                        log.info("execute: skipping extraction of [{}], disabled via configuration.", assetPath);
                        return;
                    }

                    item.getWorkflowData().getMetaDataMap().put("noretry", true);

                    log.info("scan: scanning archive [{}] and verifying configured limits", assetPath);
                    scan(context);

                    log.debug("execute: calling beforeExtract for [{}]", assetPath);
                    beforeExtract(context);

                    boolean isExtractionSuccessful = false;

                    try {

                        isExtractionSuccessful = extract(context);

                    } catch (IOException e) {
                        log.error("execute: IO error while extracting archive [{}]: ", assetPath, e);

                    } catch (RepositoryException e) {
                        log.error("execute: repository error while extracting archive [{}]: ", assetPath, e);

                    } finally {
                        log.debug("execute: calling afterExtract for [{}] (extraction {})",
                                  assetPath, (isExtractionSuccessful) ? "successful" : "failed");
                        afterExtract(context, isExtractionSuccessful);
                        IOUtils.closeQuietly(stream);
                    }

                } else {
                    log.error("execute: cannot extract archive, asset [{}] in workflow [{}] "
                              + "doesn't have binary stream.", assetPath, item.getId());
                }

            } else {
                log.error("execute: cannot extract archive, asset [{}] in workflow [{}] doesn't have original file.",
                          assetPath, item.getId());
            }

        } else {
            log.error("execute: cannot extract archive, asset [{}] in workflow [{}] does not exist.",
                      item.getWorkflowData().getPayload().toString(), item.getId());
        }
    }

    private boolean extract(final UnarchiverContext context) throws RepositoryException, IOException {

        final Session session = context.getSession();
        final Asset asset = context.getAsset();
        final String assetPath = asset.getPath();

        final ZipInputStream zis = context.createZipInputStream();

        try {

            log.info("extract: begin extraction of archive [{}] - update mode [{}]",
                     assetPath, context.getUpdateMode().name());

            // create the root zip folder. if mode = "OVERWRITE", any existing one is removed
            // if update mode = "UPDATE", the existing folder is taken as the root
            // if update mode = "NEW", a new parallel uniquely named folder is created
            context.setRoot(getOrCreateRoot(context));

            try {

                ZipEntry entry;
                while (null != (entry = zis.getNextEntry())) {

                    final String name = entry.getName();

                    if (isExtractEntry(context, entry)) {

                        long numFiles = context.updateNumFiles();

                        extractEntry(context, zis, entry);

                        // intermediate save
                        if (numFiles % context.getSaveThreshold() == 0) {
                            log.debug("extract: threshold of [{}] reached, saving....", context.getSaveThreshold());
                            session.save();
                        }

                    } else {
                        log.info("extract: extraction of entry [{}] skipped for archive [{}]", name, assetPath);
                    }
                }

            } catch (Exception e) {
                log.error("extract: error while extracting archive [{}]: ", assetPath, e);

            } finally {
                zis.closeEntry();
            }


            log.info("extract: extraction of archive [{}] successfully completed.", assetPath);

            return true;

        } finally {
            IOUtils.closeQuietly(zis);
        }
    }

    private long getFolderCount(final HashMap folderMap, String zipFolder) {
        long count;
        if (folderMap.containsKey(zipFolder)) {
            count = folderMap.get(zipFolder);
            folderMap.put(zipFolder, ++count);
        } else {
            count = (long) 1;
            folderMap.put(zipFolder, count);
        }
        return count;
    }

    private String getSHA1(final MessageDigest sha1) {
        return (new BigInteger(1, sha1.digest())).toString(16);
    }

    private static boolean isZipFile(final Asset asset) {
        return MIME_TYPE_ZIP.equals(asset.getMimeType()) || asset.getName().endsWith(FILE_EXT_ZIP);
    }

    // extendable methods ---------------


    /**
     * Scans the archive ("simulated extraction") in order to verify configured limits before extraction. The scan is
     * executed as the first step. Only after a successful scan and with all limits respected the extraction will
     * continue with {@link #beforeExtract(com.day.cq.dam.core.process.UnarchiverProcess.UnarchiverContext)}. No changes
     * to the repository are made during the scan. This method also updates the {@link UnarchiverContext} with the total
     * number of bytes and files that will be extracted.
     *
     * @param context The {@link UnarchiverContext}.
     *
     * @throws com.day.cq.workflow.WorkflowException
     *          If any of the configured limits is exceeded.
     * @see com.day.cq.dam.core.process.UnarchiverProcess.UnarchiverContext#getMaxBytes()
     * @see com.day.cq.dam.core.process.UnarchiverProcess.UnarchiverContext#getMaxNumFiles()
     * @see com.day.cq.dam.core.process.UnarchiverProcess.UnarchiverContext#getMaxNumFilesPerDirectory()
     */
    protected void scan(final UnarchiverContext context) throws WorkflowException {

        final Asset asset = context.getAsset();
        final HashMap folderMap = new HashMap();
        final String assetPath = asset.getPath();

        final long maxBytes = context.getMaxBytes();
        final long maxNumFiles = context.getMaxNumFiles();
        final long maxNumFilesPerDir = context.getMaxNumFilesPerDirectory();

        log.info("scan: configured limits: max bytes [{}], max files [{}], max files per dir ["
                 + maxNumFilesPerDir + "]", maxBytes, maxNumFiles);

        final ZipInputStream zis = context.createZipInputStream();

        try {

            long numBytes = 0;
            long numFiles = 0;
            ZipEntry entry;

            while (null != (entry = zis.getNextEntry())) {

                final EntryInfo info = context.getEntryInfo(entry);
                final String zipFolder = info.getParentPath();

                if (isExtractEntry(context, entry)) {

                    long entryBytes = 0;
                    int size;
                    byte[] buffer = new byte[BUFFER];
                    while ((size = zis.read(buffer, 0, buffer.length)) != -1) {
                        entryBytes += size;
                    }

                    entry.setSize(entryBytes);
                    numBytes += entryBytes;
                    if (numBytes > maxBytes) {
                        log.error("scan: archive [{}] exceeds configured max bytes limit [{}]", assetPath, maxBytes);
                        throw new WorkflowException("Configured max number of bytes exceeded");
                    }

                    numFiles++;
                    if (numFiles > maxNumFiles) {
                        log.error("scan: archive [{}] exceeds max number of files limit [{}]", assetPath, maxNumFiles);
                        throw new WorkflowException("Configured max number of files limit exceeded");
                    }

                    final long dirFileCount = getFolderCount(folderMap, zipFolder);

                    if (dirFileCount > maxNumFilesPerDir) {
                        log.error("scan: archive [{}] exceeds max number of files/folders per directory limit ["
                                  + maxNumFilesPerDir + "] in directory [{}]", assetPath, zipFolder);
                        throw new WorkflowException("Configured total number of files limit reached");

                    }

                    log.debug("scan: scanned entry [{}] - [{}] bytes - folder:[" + zipFolder + " - " + dirFileCount
                              + " - " + maxNumFilesPerDir + "]", entry.getName(), entry.getSize());
                }
            }

            context.setTotalNumFiles(numFiles);
            context.setTotalNumBytes(numBytes);

            log.info("scan: scan of archive [{}] completed. archive is within configured limits.", assetPath);
            log.info("scan: archive [{}] will result in [{}] extracted files with a total of [" + numBytes + "] bytes.",
                     assetPath, numFiles);

        } catch (IOException e) {
            log.error("scan: IO error while scanning archive [{}]: ", assetPath, e);

        } finally {
            IOUtils.closeQuietly(zis);
        }
    }

    /**
     * This method is called for every entry in the zip archive during the actual extraction run. The method creates
     * files and folders in the repository based on the given {@link ZipEntry} and respecting the {@link
     * UnarchiverContext.UPDATE_MODE}s.
     *
     * @param context The {@link UnarchiverContext}
     * @param zis     The {@link ZipInputStream}. If you override this method, make sure you don't accidentally close
     *                the stream.
     * @param entry   The {@link ZipEntry} to extract.
     *
     * @throws Exception If any exception occurs during extraction.
     */
    protected void extractEntry(final UnarchiverContext context, final ZipInputStream zis, final ZipEntry entry)
            throws Exception {

        final Session session = context.getSession();
        final Node root = context.getTargetRoot();

        final EntryInfo info = context.getEntryInfo(entry);
        final String name = entry.getName();
        final String zipPath = info.getPath();
        final String fileName = info.getFileName();
        final String fullPath = info.getTargetPath(root.getPath());

        log.debug("extractEntry: extracting entry [{}] in archive [{}]", entry.getName(), context.getAsset().getPath());

        if (entry.isDirectory()) {

            if (!root.hasNode(zipPath)) {
                final Node node = JcrUtil.createPath(root, zipPath, false, "sling:OrderedFolder", "sling:OrderedFolder",
                                                     session, false);

                log.info("extractEntry: created directory [{}] from entry [{}]", node.getPath(), name);

            } else {
                log.debug("extractEntry: directory [{}] already exists, skipping entry [{}]", fullPath, name);
            }

        } else {

            final File tempFile = File.createTempFile("unarchiver-file-" + System.currentTimeMillis(), null);
            log.debug("extractEntry: created temporary file at [{}]", tempFile.getPath());
            FileOutputStream fos = null;
            FileInputStream fis = null;

            try {

                int size;
                long numBytes = 0;
                byte[] buffer = new byte[BUFFER];
                final MessageDigest sha1 = MessageDigest.getInstance("SHA1");

                fos = new FileOutputStream(tempFile);

                while ((size = zis.read(buffer, 0, buffer.length)) != -1) {
                    fos.write(buffer, 0, size);
                    sha1.update(buffer, 0, size);
                    numBytes += size;
                }

                IOUtils.closeQuietly(fos);

                context.updateNumBytes(numBytes);
                if (!isMatchSkipFileNamePatterns(context, fileName)) {
                    
                    final String zipSha1 = getSHA1(sha1);
                    log.debug("extractEntry: got SHA-1 [{}] for entry [{}]", zipSha1, name);
    
                    final Resource resource = getResourceResolver(session).getResource(fullPath);
                    Asset target = null;
                    // this case only applies in update mode = "UPDATE"
                    if (null != resource && null != (target = resource.adaptTo(Asset.class))) {
                        final String assetSha1 = target.getMetadataValue(DamConstants.PN_SHA1);
    
                        // original is identical to the zip entry file, skip
                        if (StringUtils.equals(assetSha1, zipSha1)) {
                            log.info("extractEntry: entry [{}] exists as asset [{}] with identical SHA-1, skipping.",
                                     name, target.getPath());
                            return;
                        }
                    }
    
                    fis = new FileInputStream(tempFile);
                    final String mimeType = mimeTypeService.getMimeType(fileName);
    
                    final Asset newAsset;
    
                    // SHA-1 is not the same, update original. this case only applies in update mode = UPDATE
                    if (null != target) {
                        newAsset = target.addRendition("original", fis, mimeType).getAsset();
                        log.info("extractEntry: updated existing asset [{}] from entry [{}]", newAsset.getPath(), name);
    
                    } else if (null != resource) {
                        log.error("extractEntry: cannot extract entry [{}], blocking resource at [{}]",
                                  name, resource.getPath());
                        throw new WorkflowException("Cannot extract entry, blocking resource.");
    
                    } else {
                        newAsset = context.getAssetManager().createAsset(fullPath, fis, mimeType, false);
                        if (null != newAsset) {
                            log.info("extractEntry: created new asset [{}] from entry [{}]", newAsset.getPath(), name);
                        } else {
                            log.error("extractEntry: asset manager couldn't create asset for entry [{}]", name);
                            throw new WorkflowException("Asset manager couldn't create asset for entry " + name);
                        }
                    }
                }
            } finally {
                IOUtils.closeQuietly(fos);
                IOUtils.closeQuietly(fis);
                tempFile.delete();
                log.debug("extractEntry: deleted temporary file at [{}]", tempFile.getPath());
            }
        }
    }
    
    private boolean isMatchSkipFileNamePatterns(UnarchiverContext context,
            String fileName) {
        String patterns[] = context.getSkipFileNamePatterns().split(":");
        for (String pattern : patterns) {
            if (fileName.matches(pattern)) {
                return true;
            }
        }
        return false;
    }

    /**
     * Executed after extraction, regardless of the extraction being successful or having failed. The default
     * implementation removes the original archive if {@link UnarchiverProcess.UnarchiverContext#isRemoveOriginal()}
     * returns true and the extraction was successful indicated by isExtractionSuccessful.
     *
     * @param context                The {@link com.day.cq.dam.core.process.UnarchiverProcess.UnarchiverContext}.
     * @param isExtractionSuccessful Indicates whether the extract was successful.
     */
    protected void afterExtract(final UnarchiverContext context, final boolean isExtractionSuccessful) {

        final String assetPath = context.getAsset().getPath();

        if (!isExtractionSuccessful) {
            log.info("afterExtract: not removing original archive [{}] as preceding extraction failed.", assetPath);
            return;
        }

        if (context.isRemoveOriginal()) {

            final Session session = context.getSession();

            log.debug("afterExtract: removing original archive [{}]", assetPath);

            try {
                session.removeItem(assetPath);

                log.info("afterExtract: original archive [{}] removed", assetPath);

            } catch (RepositoryException e) {
                log.error("afterExtract: could not remove asset [{}]: ", assetPath, e);
            }
        } else {
            log.info("afterExtract: not removing original archive [{}] as per configuration.", assetPath);
        }
    }

    /**
     * Executed for every entry in the archive during {@link #scan(UnarchiverContext)} and {@link
     * #extract(UnarchiverContext)}. This method can be overridden to determine whether a particular entry shall be
     * scanned/extracted or not. Default implementation returns true always.
     *
     * @param context The {@link UnarchiverContext}.
     * @param entry   The {@link ZipEntry} to be scanned/extracted.
     *
     * @return true if this entry shall be scanned/extracted, false otherwise.
     */
    @SuppressWarnings("unused")
    protected boolean isExtractEntry(final UnarchiverContext context, final ZipEntry entry) {
        return true;
    }

    /**
     * Executed after verifying that the asset has content and is an archive. The default implementation is NOOP.
     *
     * @param context The {@link UnarchiverContext}.
     */
    @SuppressWarnings("unused")
    protected void beforeExtract(final UnarchiverContext context) {
        // noop
    }

    /**
     * This method serves for retrieving and/or creating the root node serving as the extraction target folder.
     *
     * @param context The {@link UnarchiverContext}
     *
     * @return The {@link Node} into which the archive is extracted.
     *
     * @throws RepositoryException If node retrieval or creation fails.
     */
    protected Node getOrCreateRoot(final UnarchiverContext context) throws RepositoryException {

        final Session session = context.getSession();
        final Asset asset = context.getAsset();
        final String assetPath = asset.getPath();
        final String rootHint = Text.getRelativeParent(asset.getPath(), 1) + "/"
                                + StringUtils.substringBeforeLast(asset.getName(), ".");
        final Node root;

        if (UnarchiverContext.UPDATE_MODE.OVERWRITE == context.getUpdateMode()) {

            if (session.itemExists(rootHint)) {
                log.debug("extract: update mode is [{}], removing existing folder [{}]...",
                          context.getUpdateMode().name(), rootHint);
                session.getItem(rootHint).remove();
                log.info("extract: target folder [{}] removed.", rootHint);
            }

            root = JcrUtil.createPath(rootHint, "sling:OrderedFolder", session);
            log.debug("extract: created extraction folder at [{}] for [{}]", root.getPath(), assetPath);

        } else if (UnarchiverContext.UPDATE_MODE.UPDATE == context.getUpdateMode()) {

            if (session.itemExists(rootHint)) {
                root = (Node) session.getItem(rootHint);
                log.debug("extract: update mode is [{}], using existing folder [{}]...",
                          context.getUpdateMode().name(), rootHint);
            } else {
                root = JcrUtil.createPath(rootHint, "sling:OrderedFolder", session);
                log.debug("extract: update mode is [{}], but destination doesn't exit, created [{}]",
                          context.getUpdateMode().name(), rootHint);
            }

        } else {
            log.debug("extract: update mode is [{}], creating unique folder...", context.getUpdateMode().name());
            root = JcrUtil.createUniquePath(rootHint, "sling:OrderedFolder", session);
            log.debug("extract: created extraction folder at [{}] for [{}]", root.getPath(), assetPath);
        }

        return root;
    }


    /**
     * The UnarchiverContext describes the support provided by the {@link UnarchiverProcess} for the
     * extraction process.
     */
    protected static class UnarchiverContext {

        protected static final String ARG_NAME_DISABLE_EXTRACT = "disableExtract";
        protected static final String ARG_NAME_REMOVE_ORIGINAL = "removeOriginal";
        protected static final String ARG_NAME_MAX_BYTES = "maxBytes";
        protected static final String ARG_NAME_MAX_NUM_ITEMS = "maxNumItems";
        protected static final String ARG_NAME_MAX_NUM_ITEMS_PER_DIR = "maxNumItemsPerDir";
        protected static final String ARG_NAME_SAVE_THRESHOLD = "saveThreshold";
        protected static final String ARG_NAME_UPDATE_MODE = "updateMode";
        protected static final String ARG_NAME_SKIP_FILE_NAME_PATTERNS = "skipFileNamePatterns";

        protected static final long DEFAULT_MAX_BYTES = 104857600;
        protected static final long DEFAULT_MAX_NUM_ITEMS = 10000;
        protected static final long DEFAULT_MAX_NUM_ITEMS_PER_DIR = 100;
        protected static final long DEFAULT_SAVE_THRESHOLD = 1024;
        //private static final String DEFAULT_SKIP_FILE_NAME_PATTERNS = "^\\.(.*)*:Acr[0-9]*.tmp";

        protected enum UPDATE_MODE {
            OVERWRITE,
            UPDATE,
            NEW
        }

        private final boolean removeOriginal;
        private final boolean disableExtraction;

        private final long maxBytes;
        private final long maxNumFiles;
        private final long maxNumFilesPerDir;
        private final long saveThreshold;

        private final Session session;
        private final AssetManager assetManager;
        private final Asset asset;

        private Node root;
        private UPDATE_MODE updateMode;

        private long numFiles = 0;
        private long numBytes = 0;
        private long totalNumFiles;
        private long totalNumBytes;
        private String skipFileNamePatterns = "";

        private UnarchiverContext(final Session session, final AssetManager assetManager, final Asset asset,
                                  final MetaDataMap args) {
            this.session = session;
            this.assetManager = assetManager;
            this.asset = asset;
            removeOriginal = args.get(ARG_NAME_REMOVE_ORIGINAL, false);
            disableExtraction = args.get(ARG_NAME_DISABLE_EXTRACT, false);
            maxBytes = args.get(ARG_NAME_MAX_BYTES, DEFAULT_MAX_BYTES);
            maxNumFiles = args.get(ARG_NAME_MAX_NUM_ITEMS, DEFAULT_MAX_NUM_ITEMS);
            maxNumFilesPerDir = args.get(ARG_NAME_MAX_NUM_ITEMS_PER_DIR, DEFAULT_MAX_NUM_ITEMS_PER_DIR);
            saveThreshold = args.get(ARG_NAME_SAVE_THRESHOLD, DEFAULT_SAVE_THRESHOLD);
            skipFileNamePatterns = args.get(ARG_NAME_SKIP_FILE_NAME_PATTERNS, "");
            try {
                final String updateModeStr = args.get(ARG_NAME_UPDATE_MODE, "new").toUpperCase();
                updateMode = UPDATE_MODE.valueOf(updateModeStr);
            } catch (IllegalArgumentException e) {
                updateMode = UPDATE_MODE.NEW;
            }

        }

        /**
         * Return the {@link Asset} representing the original archive to be extracted.
         *
         * @return The {@link Asset}.
         */
        public Asset getAsset() {
            return asset;
        }

        /**
         * Returns the {@link AssetManager} for DAM operations.
         *
         * @return The {@link AssetManager}
         */
        public AssetManager getAssetManager() {
            return assetManager;
        }

        /**
         * Return context specific information about a {@link ZipEntry}.
         *
         * @param entry The entry for which to return context information.
         *
         * @return A {@link EntryInfo} for the given zip entry.
         */
        public EntryInfo getEntryInfo(final ZipEntry entry) {
            return new EntryInfo(entry);
        }

        /**
         * The number of items added to the repository session before {@link javax.jcr.Session#save()} is called.
         *
         * @return The save threshold.
         */
        public long getSaveThreshold() {
            return saveThreshold;
        }

        /**
         * Get the extraction target root/directory.
         *
         * @return The {@link Node} representing the extraction target.
         */
        public Node getTargetRoot() {
            return root;
        }

        /**
         * Return the current number of extracted files/directories during an extraction run. This info is only valid
         * and updated during {@link UnarchiverProcess#extract(UnarchiverContext)}. The info is updated before the
         * actual extraction of the entry is persisted to the repository.
         *
         * @return The current number of already extracted files/directories.
         */
        @SuppressWarnings("unused")
        public long getNumFiles() {
            return numFiles;
        }

        /**
         * Return the current number of bytes extracted during an extraction run. This info is only valid and updated
         * during {@link UnarchiverProcess#extract(UnarchiverContext)}. The info is updated before the actual extraction
         * of the entry is persisted to the repository.
         *
         * @return The current number of bytes extracted.
         */
        @SuppressWarnings("unused")
        public long getNumBytes() {
            return numBytes;
        }

        /**
         * The {@link Session} this process operates with.
         *
         * @return The {@link Session}
         */
        public Session getSession() {
            return session;
        }

        /**
         * Returns the maximum number of bytes allowed to be extracted via the Unarchiver workflow step configuration.
         *
         * @return a long representing the maximum number of bytes. Default: 104857600 (100 Mb).
         */
        protected long getMaxBytes() {
            return maxBytes;
        }

        /**
         * Returns the maximum number of files allowed to be extracted via the Unarchiver workflow step configuration.
         *
         * @return a long representing the maximum number of files. Default: 10000.
         */
        protected long getMaxNumFiles() {
            return maxNumFiles;
        }

        /**
         * Indicates the maximum number of files allowed as direct children of a folder.
         *
         * @return a long representing the maximum number of child files of a folder. Default: 1000.
         */
        public long getMaxNumFilesPerDirectory() {
            return maxNumFilesPerDir;
        }

        /**
         * The total number of files that will be extracted. This information is collected by the {@link
         * UnarchiverProcess#scan(com.day.cq.dam.core.process.UnarchiverProcess.UnarchiverContext)} method.
         *
         * @return The total number of files that will be extracted.
         */
        @SuppressWarnings("unused")
        public long getTotalNumFiles() {
            return totalNumFiles;
        }

        /**
         * The total number of bytes that will be extracted. This information is collected by the {@link
         * UnarchiverProcess#scan(com.day.cq.dam.core.process.UnarchiverProcess.UnarchiverContext)} method.
         *
         * @return The total number of bytes that will be extracted.
         */
        @SuppressWarnings("unused")
        public long getTotalNumBytes() {
            return totalNumBytes;
        }

        /**
         * Indicates the mode with which existing folders are handled during archive extraction.
         *
         * @return The {@link UnarchiverContext.UPDATE_MODE} as per configuration.
         */
        public UPDATE_MODE getUpdateMode() {
            return updateMode;
        }

        /**
         * Indicates whether extraction is disabled as a whole via the Unarchiver workflow step configuration.
         *
         * @return true if extraction is disabled. Default: false.
         */
        protected boolean isDisableExtraction() {
            return disableExtraction;
        }

        /**
         * Indicates whether the original archive (DAM Asset) shall be removed after successful extraction.
         *
         * @return true if the original is to be removed. Default: false.
         */
        protected boolean isRemoveOriginal() {
            return removeOriginal;
        }

        /**
         * Creates a new {@link ZipInputStream} from the {@link Asset}'s original rendition.
         *
         * @return Always a newly created stream.
         */
        protected ZipInputStream createZipInputStream() {
            return new ZipInputStream(new BufferedInputStream(asset.getOriginal().getStream()), Charset.forName("Cp437"));
        }

        private void setRoot(final Node root) {
            this.root = root;
        }

        private long updateNumBytes(final long numBytes) {
            this.numBytes += numBytes;
            return this.numBytes;
        }

        private long updateNumFiles() {
            this.numFiles++;
            return this.numFiles;
        }

        private void setTotalNumFiles(final long totalNumFiles) {
            this.totalNumFiles = totalNumFiles;
        }

        private void setTotalNumBytes(final long totalNumBytes) {
            this.totalNumBytes = totalNumBytes;
        }

        /**
         * returns the skip file pattern
         * @return skip file pattern
         */
        protected String getSkipFileNamePatterns() {
            return skipFileNamePatterns;
        }
    }

    /**
     * Provices information about a {@link ZipEntry} as needed by the {@link UnarchiverContext} and {@link
     * UnarchiverProcess}.
     */
    protected static class EntryInfo {

        private static final String ROOT_FOLDER = "./";

        private final ZipEntry entry;
        private final String path;
        private final String fileName;
        private final String parentPath;

        private EntryInfo(final ZipEntry entry) {
            this.entry = entry;
            final String name = entry.getName();
            path = name.endsWith("/") ? StringUtils.substring(name, 0, name.length() - 1) : name;
            fileName = Text.getName(entry.getName());
            parentPath = Text.getRelativeParent(path, 1);
        }

        /**
         * Get the original {@link ZipEntry}.
         *
         * @return The zip entry.
         */
        @SuppressWarnings("unused")
        protected ZipEntry getEntry() {
            return entry;
        }

        /**
         * Get the entry's file name. E.g. the zip entry test/folder/example.xls would return the filename
         * example.xls.
         *
         * @return A string representing the file name.
         */
        protected String getFileName() {
            return fileName;
        }

        /**
         * Return the parent path within the archive for the given zip entry. The parent path for the entry
         * test/folder/example.xls would return test/folder.
         *
         * @return A string representing the parent path.
         */
        protected String getParentPath() {
            return StringUtils.isBlank(parentPath) ? ROOT_FOLDER : parentPath;
        }

        /**
         * Returns the full path within the archive of the given zip entry, e.g. test/folder/example.xls.
         *
         * @return A string representing the path.
         */
        protected String getPath() {
            return path;
        }

        /**
         * Returns the target repository path for the given zip entry, depending on the given root path. The name part
         * of the path will have any illegal characters escaped via {@link JcrUtil#createValidName(String)}.
         *
         * @param rootPath The root extraction target path for the archive.
         *
         * @return A string representing the repository path.
         */
        protected String getTargetPath(final String rootPath) {
            return rootPath + "/"
                   + (ROOT_FOLDER.equals(getParentPath()) ? "" : getParentPath() + "/")
                   +  Text.escapeIllegalJcrChars(getFileName());
        }
    }
}