All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.day.cq.dam.word.process.ExtractImagesProcess Maven / Gradle / Ivy

/*************************************************************************
 *
 * ADOBE CONFIDENTIAL
 * ___________________
 *
 *  Copyright 2012 Adobe Systems Incorporated
 *  All Rights Reserved.
 *
 * NOTICE:  All information contained herein is, and remains
 * the property of Adobe Systems Incorporated and its suppliers,
 * if any.  The intellectual and technical concepts contained
 * herein are proprietary to Adobe Systems Incorporated and its
 * suppliers and are protected by trade secret or copyright law.
 * Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained
 * from Adobe Systems Incorporated.
 **************************************************************************/
package com.day.cq.dam.word.process;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.jcr.RepositoryException;

import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Properties;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Service;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xwpf.usermodel.Document;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.osgi.framework.Constants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.adobe.granite.workflow.exec.WorkflowProcess;
import com.day.cq.dam.api.Asset;
import com.day.cq.dam.commons.process.AbstractAssetWorkflowProcess;
import com.day.cq.workflow.WorkflowSession;
import com.day.cq.workflow.WorkflowException;
import com.day.cq.workflow.metadata.MetaDataMap;
import com.day.cq.workflow.exec.WorkItem;

/**
 * A {@link WorkflowProcess} for extracting images from a Word document.
 * 

* A workflow process that extracts all images from a Word document (.docx and .doc format) and adds them as sub-assets. * * @see WorkflowProcess */ @Component @Service @Properties({ @Property(name = Constants.SERVICE_DESCRIPTION, value = "Extracts images from a Word document and adds them to the DAM as sub-assets."), @Property(name = Constants.SERVICE_VENDOR, value = "Adobe"), @Property(name="process.label", value = "Extract Images From Word")}) public class ExtractImagesProcess extends AbstractAssetWorkflowProcess { private static final Logger log = LoggerFactory.getLogger(ExtractImagesProcess.class); private static final String BMP_MIME_TYPE = "image/bmp"; private static final String DIB_MIME_TYPE = "image/dib"; private static final String EMF_MIME_TYPE = "image/x-emf"; private static final String EPS_MIME_TYPE = "image/eps"; private static final String GIF_MIME_TYPE = "image/gif"; private static final String JPG_MIME_TYPE = "image/jpeg"; private static final String PICT_MIME_TYPE = "image/pict"; private static final String PNG_MIME_TYPE = "image/png"; private static final String WMF_MIME_TYPE = "image/wmf"; private static final String WPG_MIME_TYPE = "image/wpg"; private static Map mimeTypeMap = null; private void extractFromDoc(Asset asset, WorkflowSession session) throws WorkflowException { log.info("Extracting images from: " + asset.getPath()); boolean oldBatchMode = false; InputStream is = null; try { is = asset.getOriginal().getStream(); HWPFDocument doc = new HWPFDocument(is); oldBatchMode = asset.isBatchMode(); asset.setBatchMode(true); List pics = doc.getPicturesTable().getAllPictures(); log.debug("Found " + pics.size() + " images to extract."); Iterator picIter = pics.iterator(); while (picIter.hasNext()) { Picture pic = picIter.next(); String filename = pic.suggestFullFileName(); String mimeType = pic.getMimeType(); InputStream stream = new BufferedInputStream(new ByteArrayInputStream(pic.getRawContent())); asset.addSubAsset(filename, mimeType, stream); } session.getSession().save(); log.info("Done extracting images from: " + asset.getPath()); } catch (OLE2NotOfficeXmlFileException oe) { log.error("Error while extracting images from: " + asset.getPath(), oe); } catch (Throwable t) { throw new WorkflowException(t.getMessage(), t); } finally { try { session.getSession().refresh(false); } catch (RepositoryException e) { } if (asset != null) { asset.setBatchMode(oldBatchMode); } IOUtils.closeQuietly(is); } } private void extractFromDocx(Asset asset, WorkflowSession session) throws WorkflowException { log.info("Extracting images from: " + asset.getPath()); boolean oldBatchMode = false; InputStream is = null; try { is = asset.getOriginal().getStream(); XWPFDocument doc = new XWPFDocument(is); oldBatchMode = asset.isBatchMode(); asset.setBatchMode(true); List pics = doc.getAllPictures(); log.debug("Found " + pics.size() + " images to extract."); Iterator picIter = pics.iterator(); while (picIter.hasNext()) { XWPFPictureData pic = picIter.next(); String filename = pic.getFileName(); String mimeType = getMimeType(pic.getPictureType()); InputStream stream = new BufferedInputStream(new ByteArrayInputStream(pic.getData())); asset.addSubAsset(filename, mimeType, stream); } session.getSession().save(); log.info("Done extracting images from: " + asset.getPath()); } catch (OLE2NotOfficeXmlFileException oe) { log.error("Error while extracting images from: " + asset.getPath(), oe); } catch (Throwable t) { throw new WorkflowException(t.getMessage(), t); } finally { try { session.getSession().refresh(false); } catch (RepositoryException e) { } if (asset != null) { asset.setBatchMode(oldBatchMode); } IOUtils.closeQuietly(is); } } private String getMimeType(int picType) { if (mimeTypeMap == null) { mimeTypeMap = new HashMap(); mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_BMP), BMP_MIME_TYPE); mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_DIB), DIB_MIME_TYPE); mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_EMF), EMF_MIME_TYPE); mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_EPS), EPS_MIME_TYPE); mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_GIF), GIF_MIME_TYPE); mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_JPEG), JPG_MIME_TYPE); mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_PICT), PICT_MIME_TYPE); mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_PNG), PNG_MIME_TYPE); mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_WMF), WMF_MIME_TYPE); mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_WPG), WPG_MIME_TYPE); } String mimeType = mimeTypeMap.get(new Integer(picType)); if (mimeType == null) { //this should not happen, but just in case use a generic mime type mimeType = "application/octet-stream"; } return mimeType; } public void execute(WorkItem item,WorkflowSession session,MetaDataMap args) throws WorkflowException { Asset asset = getAssetFromPayload(item, session.getSession()); final String assetMime = asset.getMimeType(); if (assetMime.equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) { extractFromDocx(asset, session); } else if (assetMime.matches("application.*msword")) { extractFromDoc(asset, session); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy