com.day.cq.dam.core.process.ExifToolExtractMetadataProcess Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of uber-jar Show documentation
There is a newer version: 6.5.21
/*
 * Copyright 1997-2008 Day Management AG
 * Barfuesserplatz 6, 4001 Basel, Switzerland
 * All Rights Reserved.
 *
 * This software is the confidential and proprietary information of
 * Day Management AG, ("Confidential Information"). You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Day.
 */
package com.day.cq.dam.core.process;

import static com.day.cq.commons.jcr.JcrConstants.JCR_CONTENT;
import static com.day.cq.dam.api.DamConstants.DC_FORMAT;
import static com.day.cq.dam.api.DamConstants.METADATA_FOLDER;
import static com.day.cq.dam.api.DamConstants.PN_EXTRACTED;
import static com.day.cq.dam.api.DamConstants.PN_SHA1;
import static com.day.cq.dam.api.DamConstants.DAM_SIZE;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import javax.jcr.Node;
import javax.jcr.Session;
import javax.jcr.RepositoryException;

import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.exec.CommandLine;
import org.apache.commons.exec.DefaultExecutor;
import org.apache.commons.exec.ExecuteStreamHandler;
import org.apache.commons.exec.PumpStreamHandler;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ValueMap;
import org.osgi.service.component.ComponentContext;
import org.apache.sling.commons.osgi.OsgiUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.adobe.xmp.XMPConst;
import com.adobe.xmp.XMPMeta;
import com.adobe.xmp.XMPMetaFactory;
import com.day.cq.dam.api.Asset;
import com.day.cq.dam.api.Rendition;
import com.day.cq.dam.api.handler.AssetHandler;
import com.day.cq.dam.api.metadata.ExtractedMetadata;
import com.day.cq.dam.commons.metadata.SimpleXmpToJcrMetadataBuilder;
import com.day.cq.dam.commons.process.AbstractAssetWorkflowProcess;
import com.day.cq.tagging.JcrTagManagerFactory;
import com.day.cq.tagging.Tag;
import com.day.cq.tagging.TagManager;
import com.day.cq.workflow.WorkflowException;
import com.day.cq.workflow.WorkflowSession;
import com.day.cq.workflow.exec.WorkItem;
import com.day.cq.workflow.metadata.MetaDataMap;

/**
 * The ExtractMetadataProcess is called in a workflow process step.
 * This process will import metadata, if the paylod of the step is an
 * {@link com.day.cq.dam.api.Asset Asset} or a part of an
 * {@link com.day.cq.dam.api.Asset Asset}.
 * 
 * @see AbstractAssetWorkflowProcess
 */
@Component(metatype = true)
@Service
@Property(name = "process.label", value = "Exif ToolExtract Meta Data")
public class ExifToolExtractMetadataProcess extends AbstractAssetWorkflowProcess {
    public enum Arguments {
        PROCESS_ARGS("PROCESS_ARGS"),

        /** Argument for the mime types to use */
        MIME_TYPES("mime"),

        /** Argument for thumb nail sizes */
        THUMBNAILS("tn"),

        /** Argument for command-line to execute */
        COMMANDS("cmd");

        private String argumentName;

        Arguments(String argumentName) {
            this.argumentName = argumentName;
        }

        public String getArgumentName() {
            return this.argumentName;
        }

        public String getArgumentPrefix() {
            return this.argumentName + ":";
        }

    }

    private static final String JCR_CONTENT_JCR_DATA = "jcr:content/jcr:data";

    private static final String METADATA_PROPERTY_NAME_ADOBE_KEYWORDS = "lr:hierarchicalSubject";

    private static final String SYNC_FLAG = "newRendition";

    @Reference
    private JcrTagManagerFactory tagManagerFactory = null;

    @Reference
    private SimpleXmpToJcrMetadataBuilder metadataMetadataBuilderXmpTo;

    /**
     * Logger instance for this class.
     */
    private static final Logger log = LoggerFactory
            .getLogger(ExifToolExtractMetadataProcess.class);

    private String defaultFormats[] = new String[] { "application/octet-stream" };

    private boolean sha1Enabled = false;

    @Property(boolValue = true)
    public static final String ENABLE_SHA1_GEN = "cq.dam.enable.sha1";

    public void execute(WorkItem workItem, WorkflowSession workflowSession,
            MetaDataMap args) throws WorkflowException {
        String[] arguments = buildArguments(args);
        try {
            final Session session = workflowSession.getSession();
            final Asset asset = getAssetFromPayload(workItem, session);
            if (null != asset) {
                asset.setBatchMode(true);
                final AssetHandler handler = getAssetHandler(asset.getMimeType());
                if (null != handler) {
                    // Setting an extracted property to stop XMP Writeback
                    // process
                    final Node assetNode = asset.adaptTo(Node.class);
                    final Node content = assetNode.getNode(JCR_CONTENT);
                    if (!content.hasProperty(SYNC_FLAG)) {
                        content.setProperty(SYNC_FLAG, true);
                    }
                    final ExtractedMetadata metadata = new ExtractedMetadata();

                       File tmpDir = null;
                        InputStream is = null;
                        OutputStream os = null;
                        try {
                            // Process only specific mime types, based on arguments
                            final List mimeTypes = new LinkedList();
                            final String assetMimeType = asset.getMimeType();
                            for (String str : arguments) {
                                if (str.startsWith(Arguments.MIME_TYPES.getArgumentPrefix())) {
                                    final String mt = str.substring(Arguments.MIME_TYPES.getArgumentPrefix().length()).trim();
                                    log.debug("execute: accepted mime type [{}] for asset [{}].", mt, asset.getPath());
                                    mimeTypes.add(mt);
                                }
                            }
                            if (!mimeTypes.contains(assetMimeType)) {
                                log.info("execute: mime type [{}] of asset [{}] is not in list of accepted mime types [" + mimeTypes
                                        + "], ignoring.", assetMimeType, asset.getPath());
                                return;
                            }

                            // creating temp directory
                            tmpDir = File.createTempFile("cqdam", null);
                            tmpDir.delete();
                            tmpDir.mkdir();

                            // make sure that tumbnails are not processed again, otherwise you
                            // will end in a endless recursion
                            if (null != asset) {
                                // getting the resource for the node

                                final Rendition original = asset.getOriginal();

                                // streaming file to temp directory
                                final File tmpFile = new File(tmpDir, asset.getName());
                                OutputStream fos = new FileOutputStream(tmpFile);
                                InputStream in = null;
                                 try {
                                    in = original.getStream();
                                    IOUtils.copy(in, fos);
                                 }
                                 finally {
                                    IOUtils.closeQuietly(in);
                                    IOUtils.closeQuietly(fos);
                                 }
                                // building command line
                                CommandLine commandLine;
                                String lastLine = "";
                                Map parameters = new HashMap();
                                parameters.put("filename", tmpFile.getName());
                                parameters.put("file", tmpFile.getAbsolutePath());
                                parameters.put("directory", tmpDir.getAbsolutePath());
                                parameters.put("basename", tmpFile.getName().replaceFirst("\\..*$", ""));
                                parameters.put("extension", tmpFile.getName().replaceFirst("^.*\\.", ""));

                                try {
                                    for (String argument : arguments) {
                                        if (argument.startsWith(Arguments.COMMANDS.getArgumentPrefix())) {
                                            // Execute command line
                                            final String cmd = argument.substring(Arguments.COMMANDS.getArgumentPrefix().length())
                                                    .trim();
                                            commandLine = CommandLine.parse(cmd, parameters);
                                            lastLine = commandLine.toString();
                                            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
                                            DefaultExecutor exec = new DefaultExecutor();
                                            exec.setWorkingDirectory(tmpDir);
                                            PumpStreamHandler streamHandler = new PumpStreamHandler(outputStream);
                                            exec.setStreamHandler(streamHandler);

                                            log.info("execute: executing command line [{}] for asset [{}].", lastLine, asset.getPath());
                                            // No need to check the exit value, we get
                                            // an Exception from the executor if it's not 0
                                            exec.execute(commandLine);
                                            String[] commands = new String[] { "exiftool", parameters.get("file"),">","output.txt" };
                                            Runtime rt = Runtime.getRuntime();
                                            //Process pr = rt.exec("cmd /c "+commandLine.toString());
                                            String[] metalines = outputStream.toString().split("\\r?\\n");
                                            //BufferedReader stdInput = new BufferedReader(new
                                            //        InputStreamReader(pr.getInputStream()));

                                             //  BufferedReader stdError = new BufferedReader(new
                                             //       InputStreamReader(pr.getErrorStream()));

                                               // read the output from the command
                                            System.out.println("Here is the standard output of the command:\n");
                                            String s = null;
                                            for (String st: metalines){
                                                String[] items = st.split(":");
                                                metadata.setMetaDataProperty(items[0].trim(), items[1].trim());
                                           }
                                        }
                                    }
                                }catch(Exception e) {
                                    log.error("execute: failed to execute command [{}] for asset [" + asset.getPath() + "]: ",
                                            lastLine, e);
                                }
                            }
                        }catch(Exception e) {}

                    // set these properties always
                    metadata.setMetaDataProperty(PN_EXTRACTED, Calendar
                            .getInstance().getTime());

                     Rendition original = asset.getOriginal();
                    if(sha1Enabled) {
                        InputStream in = original.getStream();
                        try {
                            final String sha1 = DigestUtils.shaHex(in);
                            metadata.setMetaDataProperty(PN_SHA1, sha1);
                         }
                        finally {
                            IOUtils.closeQuietly(in);
                        }
                    }

                    //add size of the asset as property
                    Node originalBinary = original.adaptTo(Node.class);
                    if (originalBinary.hasProperty(JCR_CONTENT_JCR_DATA)) {
                        metadata.setMetaDataProperty(DAM_SIZE, originalBinary.getProperty(JCR_CONTENT_JCR_DATA).getBinary().getSize());
                    }
                    // recheck mime type respectively dc:format. it might happen
                    // that the extracted xmp data contains
                    // a wrong dc:format value. the correct dc:format value is
                    // required for the processing.
                    resetMimetype(asset, metadata);
                    saveMetadata(asset, metadata);
                    extractHierarchicalSubjects(asset, session);
                } else {
                    log.error(
                            "execute: cannot extract metadata, no handler found for asset [{}] with mime type [{}]",
                            asset.getPath(), asset.getMimeType());
                }
            } else {
                String wfPayload = workItem.getWorkflowData().getPayload()
                        .toString();
                String message = "execute: cannot extract metadata, asset [{"
                        + wfPayload
                        + "}] in payload doesn't exist for workflow [{"
                        + workItem.getId() + "}].";
                throw new WorkflowException(message);
            }
        } catch (Exception e) {
            log.warn(
                    "unexpected error occurred during metadata extraction. Cause: {}",
                    e.getMessage(), e);
        }
    }

    protected void saveMetadata(final Asset asset,
            final ExtractedMetadata metadata) {
        final Resource resource = asset.adaptTo(Resource.class);
        final ResourceResolver resolver = resource.getResourceResolver();
        final Resource metadataResource = resolver.getResource(resource,
                JCR_CONTENT + "/" + METADATA_FOLDER);
        if (null != metadataResource) {
            final Node metadataNode = metadataResource.adaptTo(Node.class);
            try {
                metadataMetadataBuilderXmpTo.storeAsXmp(metadata, metadataNode,
                        false);

                // store additional xmp data available as xml stream
                final InputStream is = metadata.getXmp();
                if (is != null) {
                    final XMPMeta meta = XMPMetaFactory.parse(is);
                    /*
                     * if dc:format is already set, then don't over ride it.
                     * Required for the bugs 41829 & 42040. If the dc:format is
                     * default format such as "application/octet-stream" and
                     * there is dc:format in XMP, we should override it. *
                     * Skipping of default formats, such as
                     * application/octet-stream is requrired when we place files
                     * through Adobe Drive/WebDAV, AssetIOHandler uses
                     * org.apache.jackrabbit.server.io.ImportContext
                     * .getMimeType(), which returns application/octet-stream if
                     * the file format is not familiar/null. This gives the
                     * following priority to obtain the dc:format file extension
                     * > non-xmp Metadata > xmp Metadata.
                     */
                    if (metadataNode.hasProperty(DC_FORMAT)
                            && !isDefaultFormat(metadataNode
                                    .getProperty(DC_FORMAT).getValue()
                                    .getString())
                            && meta.getPropertyString(XMPConst.NS_DC, DC_FORMAT) != null) {
                        meta.deleteProperty(XMPConst.NS_DC, DC_FORMAT);
                    }
                    metadataMetadataBuilderXmpTo.storeXmp(metadataNode, meta,
                            false);
                }
            } catch (Exception e) {
                log.error(
                        "saveMetadata: error while saving metdata for asset [{}]: ",
                        asset.getPath(), e);
            }
        } else {
            log.error(
                    "execute: cannot save metdata for asset [{}], doesn't have metdata node.",
                    asset.getPath());
        }
    }

    private boolean isDefaultFormat(String format) {
        for (int i = 0; i < defaultFormats.length; i++) {
            if (defaultFormats[i].equals(format)) {
                return true;
            }
        }
        return false;
    }

    /**
     * Resets the mimetype
     *
     * @param asset
     *            asset to read the mimetype
     * @param metadata
     *            metadata to set the mimetype
     */
    private void resetMimetype(Asset asset, ExtractedMetadata metadata) {
        if (mimeTypeService.getMimeType(asset.getName()) != null) {
            String mimeType = mimeTypeService.getMimeType(asset.getName());
            metadata.setMetaDataProperty(DC_FORMAT, mimeType);
        }
    }

    /**
     * This method extracts hierarchical subjects from the respective XMP
     * property ({@link #METADATA_PROPERTY_NAME_ADOBE_KEYWORDS} and attempts to
     * map them to CQ tags. If tags are found, the asset's metadata node is
     * tagged as such.
     *
     * @param asset
     *            The {@link Asset}
     * @param session
     *            The session.
     */
    private void extractHierarchicalSubjects(final Asset asset,
            final Session session) {

        final Resource resource = asset.adaptTo(Resource.class);
        final ResourceResolver resolver = resource.getResourceResolver();
        final Resource metadataResource = resolver.getResource(resource,
                JCR_CONTENT + "/" + METADATA_FOLDER);

        if (null != metadataResource) {

            final ValueMap props = metadataResource.adaptTo(ValueMap.class);
            final String[] subjects = props.get(
                    METADATA_PROPERTY_NAME_ADOBE_KEYWORDS, new String[0]);

            log.debug("got hierarchical subjects [{}] with content [{}].",
                    METADATA_PROPERTY_NAME_ADOBE_KEYWORDS,
                    StringUtils.join(subjects, ", "));

            if (subjects.length > 0) {

                final TagManager tagManager = tagManagerFactory
                        .getTagManager(session);
                final ArrayList tags = new ArrayList();

                for (final String subject : subjects) {

                    // treat first tag as namespace and "merge" with second tag
                    String titlePath = StringUtils.replaceOnce(subject, ":|",
                            ":");

                    // convert separators to path
                    titlePath = StringUtils.replace(titlePath, "|", "/");

                    final Tag tag = tagManager.resolveByTitle(titlePath);
                    if (null != tag) {
                        log.debug("got tag [{}] from title path [{}].",
                                tag.getTagID(), titlePath);
                        tags.add(tag);
                    } else {
                        log.warn("could not find tag from title path [{}].",
                                titlePath);
                    }
                }

                if (tags.size() > 0) {
                    try {
                        log.debug("tagging [{}] with [{}] tags.",
                                asset.getPath(), tags.size());
                        tagManager.setTags(metadataResource,
                                tags.toArray(new Tag[tags.size()]),
                                asset.isBatchMode());

                    } catch (Exception e) {
                        log.error(
                                "cannot save hierarchical subjects for asset [{}]: ",
                                asset.getPath(), e);
                    }
                }
            }
        } else {
            log.error(
                    "cannot save hierarchical subjects for asset [{}], doesn't have metdata node.",
                    asset.getPath());
        }
    }
        @Activate
    @SuppressWarnings("unused")
    protected void Actiate(final ComponentContext context) throws RepositoryException {
        sha1Enabled = OsgiUtil.toBoolean(context.getProperties().get(ENABLE_SHA1_GEN), true);
    }

        public String[] buildArguments(MetaDataMap metaData) {

            // the 'old' way, ensures backward compatibility
            String processArgs = metaData.get(Arguments.PROCESS_ARGS.name(), String.class);
            if (processArgs != null && !processArgs.equals("")) {
                return processArgs.split(",");
            }

            else {
                List arguments = new ArrayList();

                String[] commands = metaData.get(Arguments.COMMANDS.name(), String[].class);
                if (commands != null) {
                    for (String command : commands) {
                        StringBuilder builder = new StringBuilder();
                        builder.append(Arguments.COMMANDS.getArgumentPrefix()).append(command);
                        arguments.add(builder.toString());
                    }
                }

                String[] mimetypes = metaData.get(Arguments.MIME_TYPES.name(), String[].class);
                if (mimetypes != null) {
                    for (String mimetype : mimetypes) {
                        StringBuilder builder = new StringBuilder();
                        builder.append(Arguments.MIME_TYPES.getArgumentPrefix()).append(mimetype);
                        arguments.add(builder.toString());
                    }
                }

                String[] thumbnails = metaData.get(Arguments.THUMBNAILS.name(), String[].class);
                if (thumbnails != null) {
                    for (String thumbnail : thumbnails) {
                        StringBuilder builder = new StringBuilder();
                        builder.append(Arguments.THUMBNAILS.getArgumentPrefix()).append(thumbnail);
                        arguments.add(builder.toString());
                    }
                }

                return arguments.toArray(new String[arguments.size()]);
            }
}
}