Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Copyright 1997-2008 Day Management AG
* Barfuesserplatz 6, 4001 Basel, Switzerland
* All Rights Reserved.
*
* This software is the confidential and proprietary information of
* Day Management AG, ("Confidential Information"). You shall not
* disclose such Confidential Information and shall use it only in
* accordance with the terms of the license agreement you entered into
* with Day.
*/
package com.day.cq.dam.core.process;
import static com.day.cq.commons.jcr.JcrConstants.JCR_CONTENT;
import static com.day.cq.dam.api.DamConstants.DC_FORMAT;
import static com.day.cq.dam.api.DamConstants.METADATA_FOLDER;
import static com.day.cq.dam.api.DamConstants.PN_EXTRACTED;
import static com.day.cq.dam.api.DamConstants.PN_SHA1;
import static com.day.cq.dam.api.DamConstants.DAM_SIZE;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.jcr.Node;
import javax.jcr.Session;
import javax.jcr.RepositoryException;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.exec.CommandLine;
import org.apache.commons.exec.DefaultExecutor;
import org.apache.commons.exec.ExecuteStreamHandler;
import org.apache.commons.exec.PumpStreamHandler;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ValueMap;
import org.osgi.service.component.ComponentContext;
import org.apache.sling.commons.osgi.OsgiUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.adobe.xmp.XMPConst;
import com.adobe.xmp.XMPMeta;
import com.adobe.xmp.XMPMetaFactory;
import com.day.cq.dam.api.Asset;
import com.day.cq.dam.api.Rendition;
import com.day.cq.dam.api.handler.AssetHandler;
import com.day.cq.dam.api.metadata.ExtractedMetadata;
import com.day.cq.dam.commons.metadata.SimpleXmpToJcrMetadataBuilder;
import com.day.cq.dam.commons.process.AbstractAssetWorkflowProcess;
import com.day.cq.tagging.JcrTagManagerFactory;
import com.day.cq.tagging.Tag;
import com.day.cq.tagging.TagManager;
import com.day.cq.workflow.WorkflowException;
import com.day.cq.workflow.WorkflowSession;
import com.day.cq.workflow.exec.WorkItem;
import com.day.cq.workflow.metadata.MetaDataMap;
/**
* The ExtractMetadataProcess is called in a workflow process step.
* This process will import metadata, if the paylod of the step is an
* {@link com.day.cq.dam.api.Asset Asset} or a part of an
* {@link com.day.cq.dam.api.Asset Asset}.
*
* @see AbstractAssetWorkflowProcess
*/
@Component(metatype = true)
@Service
@Property(name = "process.label", value = "Exif ToolExtract Meta Data")
public class ExifToolExtractMetadataProcess extends AbstractAssetWorkflowProcess {
public enum Arguments {
PROCESS_ARGS("PROCESS_ARGS"),
/** Argument for the mime types to use */
MIME_TYPES("mime"),
/** Argument for thumb nail sizes */
THUMBNAILS("tn"),
/** Argument for command-line to execute */
COMMANDS("cmd");
private String argumentName;
Arguments(String argumentName) {
this.argumentName = argumentName;
}
public String getArgumentName() {
return this.argumentName;
}
public String getArgumentPrefix() {
return this.argumentName + ":";
}
}
private static final String JCR_CONTENT_JCR_DATA = "jcr:content/jcr:data";
private static final String METADATA_PROPERTY_NAME_ADOBE_KEYWORDS = "lr:hierarchicalSubject";
private static final String SYNC_FLAG = "newRendition";
@Reference
private JcrTagManagerFactory tagManagerFactory = null;
@Reference
private SimpleXmpToJcrMetadataBuilder metadataMetadataBuilderXmpTo;
/**
* Logger instance for this class.
*/
private static final Logger log = LoggerFactory
.getLogger(ExifToolExtractMetadataProcess.class);
private String defaultFormats[] = new String[] { "application/octet-stream" };
private boolean sha1Enabled = false;
@Property(boolValue = true)
public static final String ENABLE_SHA1_GEN = "cq.dam.enable.sha1";
public void execute(WorkItem workItem, WorkflowSession workflowSession,
MetaDataMap args) throws WorkflowException {
String[] arguments = buildArguments(args);
try {
final Session session = workflowSession.getSession();
final Asset asset = getAssetFromPayload(workItem, session);
if (null != asset) {
asset.setBatchMode(true);
final AssetHandler handler = getAssetHandler(asset.getMimeType());
if (null != handler) {
// Setting an extracted property to stop XMP Writeback
// process
final Node assetNode = asset.adaptTo(Node.class);
final Node content = assetNode.getNode(JCR_CONTENT);
if (!content.hasProperty(SYNC_FLAG)) {
content.setProperty(SYNC_FLAG, true);
}
final ExtractedMetadata metadata = new ExtractedMetadata();
File tmpDir = null;
InputStream is = null;
OutputStream os = null;
try {
// Process only specific mime types, based on arguments
final List mimeTypes = new LinkedList();
final String assetMimeType = asset.getMimeType();
for (String str : arguments) {
if (str.startsWith(Arguments.MIME_TYPES.getArgumentPrefix())) {
final String mt = str.substring(Arguments.MIME_TYPES.getArgumentPrefix().length()).trim();
log.debug("execute: accepted mime type [{}] for asset [{}].", mt, asset.getPath());
mimeTypes.add(mt);
}
}
if (!mimeTypes.contains(assetMimeType)) {
log.info("execute: mime type [{}] of asset [{}] is not in list of accepted mime types [" + mimeTypes
+ "], ignoring.", assetMimeType, asset.getPath());
return;
}
// creating temp directory
tmpDir = File.createTempFile("cqdam", null);
tmpDir.delete();
tmpDir.mkdir();
// make sure that tumbnails are not processed again, otherwise you
// will end in a endless recursion
if (null != asset) {
// getting the resource for the node
final Rendition original = asset.getOriginal();
// streaming file to temp directory
final File tmpFile = new File(tmpDir, asset.getName());
OutputStream fos = new FileOutputStream(tmpFile);
InputStream in = null;
try {
in = original.getStream();
IOUtils.copy(in, fos);
}
finally {
IOUtils.closeQuietly(in);
IOUtils.closeQuietly(fos);
}
// building command line
CommandLine commandLine;
String lastLine = "";
Map parameters = new HashMap();
parameters.put("filename", tmpFile.getName());
parameters.put("file", tmpFile.getAbsolutePath());
parameters.put("directory", tmpDir.getAbsolutePath());
parameters.put("basename", tmpFile.getName().replaceFirst("\\..*$", ""));
parameters.put("extension", tmpFile.getName().replaceFirst("^.*\\.", ""));
try {
for (String argument : arguments) {
if (argument.startsWith(Arguments.COMMANDS.getArgumentPrefix())) {
// Execute command line
final String cmd = argument.substring(Arguments.COMMANDS.getArgumentPrefix().length())
.trim();
commandLine = CommandLine.parse(cmd, parameters);
lastLine = commandLine.toString();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
DefaultExecutor exec = new DefaultExecutor();
exec.setWorkingDirectory(tmpDir);
PumpStreamHandler streamHandler = new PumpStreamHandler(outputStream);
exec.setStreamHandler(streamHandler);
log.info("execute: executing command line [{}] for asset [{}].", lastLine, asset.getPath());
// No need to check the exit value, we get
// an Exception from the executor if it's not 0
exec.execute(commandLine);
String[] commands = new String[] { "exiftool", parameters.get("file"),">","output.txt" };
Runtime rt = Runtime.getRuntime();
//Process pr = rt.exec("cmd /c "+commandLine.toString());
String[] metalines = outputStream.toString().split("\\r?\\n");
//BufferedReader stdInput = new BufferedReader(new
// InputStreamReader(pr.getInputStream()));
// BufferedReader stdError = new BufferedReader(new
// InputStreamReader(pr.getErrorStream()));
// read the output from the command
System.out.println("Here is the standard output of the command:\n");
String s = null;
for (String st: metalines){
String[] items = st.split(":");
metadata.setMetaDataProperty(items[0].trim(), items[1].trim());
}
}
}
}catch(Exception e) {
log.error("execute: failed to execute command [{}] for asset [" + asset.getPath() + "]: ",
lastLine, e);
}
}
}catch(Exception e) {}
// set these properties always
metadata.setMetaDataProperty(PN_EXTRACTED, Calendar
.getInstance().getTime());
Rendition original = asset.getOriginal();
if(sha1Enabled) {
InputStream in = original.getStream();
try {
final String sha1 = DigestUtils.shaHex(in);
metadata.setMetaDataProperty(PN_SHA1, sha1);
}
finally {
IOUtils.closeQuietly(in);
}
}
//add size of the asset as property
Node originalBinary = original.adaptTo(Node.class);
if (originalBinary.hasProperty(JCR_CONTENT_JCR_DATA)) {
metadata.setMetaDataProperty(DAM_SIZE, originalBinary.getProperty(JCR_CONTENT_JCR_DATA).getBinary().getSize());
}
// recheck mime type respectively dc:format. it might happen
// that the extracted xmp data contains
// a wrong dc:format value. the correct dc:format value is
// required for the processing.
resetMimetype(asset, metadata);
saveMetadata(asset, metadata);
extractHierarchicalSubjects(asset, session);
} else {
log.error(
"execute: cannot extract metadata, no handler found for asset [{}] with mime type [{}]",
asset.getPath(), asset.getMimeType());
}
} else {
String wfPayload = workItem.getWorkflowData().getPayload()
.toString();
String message = "execute: cannot extract metadata, asset [{"
+ wfPayload
+ "}] in payload doesn't exist for workflow [{"
+ workItem.getId() + "}].";
throw new WorkflowException(message);
}
} catch (Exception e) {
log.warn(
"unexpected error occurred during metadata extraction. Cause: {}",
e.getMessage(), e);
}
}
protected void saveMetadata(final Asset asset,
final ExtractedMetadata metadata) {
final Resource resource = asset.adaptTo(Resource.class);
final ResourceResolver resolver = resource.getResourceResolver();
final Resource metadataResource = resolver.getResource(resource,
JCR_CONTENT + "/" + METADATA_FOLDER);
if (null != metadataResource) {
final Node metadataNode = metadataResource.adaptTo(Node.class);
try {
metadataMetadataBuilderXmpTo.storeAsXmp(metadata, metadataNode,
false);
// store additional xmp data available as xml stream
final InputStream is = metadata.getXmp();
if (is != null) {
final XMPMeta meta = XMPMetaFactory.parse(is);
/*
* if dc:format is already set, then don't over ride it.
* Required for the bugs 41829 & 42040. If the dc:format is
* default format such as "application/octet-stream" and
* there is dc:format in XMP, we should override it. *
* Skipping of default formats, such as
* application/octet-stream is requrired when we place files
* through Adobe Drive/WebDAV, AssetIOHandler uses
* org.apache.jackrabbit.server.io.ImportContext
* .getMimeType(), which returns application/octet-stream if
* the file format is not familiar/null. This gives the
* following priority to obtain the dc:format file extension
* > non-xmp Metadata > xmp Metadata.
*/
if (metadataNode.hasProperty(DC_FORMAT)
&& !isDefaultFormat(metadataNode
.getProperty(DC_FORMAT).getValue()
.getString())
&& meta.getPropertyString(XMPConst.NS_DC, DC_FORMAT) != null) {
meta.deleteProperty(XMPConst.NS_DC, DC_FORMAT);
}
metadataMetadataBuilderXmpTo.storeXmp(metadataNode, meta,
false);
}
} catch (Exception e) {
log.error(
"saveMetadata: error while saving metdata for asset [{}]: ",
asset.getPath(), e);
}
} else {
log.error(
"execute: cannot save metdata for asset [{}], doesn't have metdata node.",
asset.getPath());
}
}
private boolean isDefaultFormat(String format) {
for (int i = 0; i < defaultFormats.length; i++) {
if (defaultFormats[i].equals(format)) {
return true;
}
}
return false;
}
/**
* Resets the mimetype
*
* @param asset
* asset to read the mimetype
* @param metadata
* metadata to set the mimetype
*/
private void resetMimetype(Asset asset, ExtractedMetadata metadata) {
if (mimeTypeService.getMimeType(asset.getName()) != null) {
String mimeType = mimeTypeService.getMimeType(asset.getName());
metadata.setMetaDataProperty(DC_FORMAT, mimeType);
}
}
/**
* This method extracts hierarchical subjects from the respective XMP
* property ({@link #METADATA_PROPERTY_NAME_ADOBE_KEYWORDS} and attempts to
* map them to CQ tags. If tags are found, the asset's metadata node is
* tagged as such.
*
* @param asset
* The {@link Asset}
* @param session
* The session.
*/
private void extractHierarchicalSubjects(final Asset asset,
final Session session) {
final Resource resource = asset.adaptTo(Resource.class);
final ResourceResolver resolver = resource.getResourceResolver();
final Resource metadataResource = resolver.getResource(resource,
JCR_CONTENT + "/" + METADATA_FOLDER);
if (null != metadataResource) {
final ValueMap props = metadataResource.adaptTo(ValueMap.class);
final String[] subjects = props.get(
METADATA_PROPERTY_NAME_ADOBE_KEYWORDS, new String[0]);
log.debug("got hierarchical subjects [{}] with content [{}].",
METADATA_PROPERTY_NAME_ADOBE_KEYWORDS,
StringUtils.join(subjects, ", "));
if (subjects.length > 0) {
final TagManager tagManager = tagManagerFactory
.getTagManager(session);
final ArrayList tags = new ArrayList();
for (final String subject : subjects) {
// treat first tag as namespace and "merge" with second tag
String titlePath = StringUtils.replaceOnce(subject, ":|",
":");
// convert separators to path
titlePath = StringUtils.replace(titlePath, "|", "/");
final Tag tag = tagManager.resolveByTitle(titlePath);
if (null != tag) {
log.debug("got tag [{}] from title path [{}].",
tag.getTagID(), titlePath);
tags.add(tag);
} else {
log.warn("could not find tag from title path [{}].",
titlePath);
}
}
if (tags.size() > 0) {
try {
log.debug("tagging [{}] with [{}] tags.",
asset.getPath(), tags.size());
tagManager.setTags(metadataResource,
tags.toArray(new Tag[tags.size()]),
asset.isBatchMode());
} catch (Exception e) {
log.error(
"cannot save hierarchical subjects for asset [{}]: ",
asset.getPath(), e);
}
}
}
} else {
log.error(
"cannot save hierarchical subjects for asset [{}], doesn't have metdata node.",
asset.getPath());
}
}
@Activate
@SuppressWarnings("unused")
protected void Actiate(final ComponentContext context) throws RepositoryException {
sha1Enabled = OsgiUtil.toBoolean(context.getProperties().get(ENABLE_SHA1_GEN), true);
}
public String[] buildArguments(MetaDataMap metaData) {
// the 'old' way, ensures backward compatibility
String processArgs = metaData.get(Arguments.PROCESS_ARGS.name(), String.class);
if (processArgs != null && !processArgs.equals("")) {
return processArgs.split(",");
}
else {
List arguments = new ArrayList();
String[] commands = metaData.get(Arguments.COMMANDS.name(), String[].class);
if (commands != null) {
for (String command : commands) {
StringBuilder builder = new StringBuilder();
builder.append(Arguments.COMMANDS.getArgumentPrefix()).append(command);
arguments.add(builder.toString());
}
}
String[] mimetypes = metaData.get(Arguments.MIME_TYPES.name(), String[].class);
if (mimetypes != null) {
for (String mimetype : mimetypes) {
StringBuilder builder = new StringBuilder();
builder.append(Arguments.MIME_TYPES.getArgumentPrefix()).append(mimetype);
arguments.add(builder.toString());
}
}
String[] thumbnails = metaData.get(Arguments.THUMBNAILS.name(), String[].class);
if (thumbnails != null) {
for (String thumbnail : thumbnails) {
StringBuilder builder = new StringBuilder();
builder.append(Arguments.THUMBNAILS.getArgumentPrefix()).append(thumbnail);
arguments.add(builder.toString());
}
}
return arguments.toArray(new String[arguments.size()]);
}
}
}