
com.day.cq.dam.handler.standard.ooxml.OpenOfficeHandler Maven / Gradle / Ivy
/*
* Copyright 1997-2011 Day Management AG
* Barfuesserplatz 6, 4001 Basel, Switzerland
* All Rights Reserved.
*
* This software is the confidential and proprietary information of
* Day Management AG, ("Confidential Information"). You shall not
* disclose such Confidential Information and shall use it only in
* accordance with the terms of the license agreement you entered into
* with Day.
*/
package com.day.cq.dam.handler.standard.ooxml;
import java.awt.Dimension;
import java.awt.image.BufferedImage;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.commons.io.IOUtils;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.ReferencePolicy;
import org.apache.felix.scr.annotations.Service;
import org.apache.sling.commons.mime.MimeTypeService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.traversal.DocumentTraversal;
import org.w3c.dom.traversal.NodeFilter;
import org.w3c.dom.traversal.NodeIterator;
import com.day.cq.dam.api.Asset;
import com.day.cq.dam.api.Rendition;
import com.day.cq.dam.api.metadata.ExtractedMetadata;
import com.day.cq.dam.commons.handler.AbstractAssetHandler;
import com.day.image.Layer;
import com.day.cq.dam.commons.xml.DocumentBuilderFactoryProvider;
/**
* Open Office metadata handler service
* */
@Component(inherit = true, metatype = false)
@Service
public class OpenOfficeHandler extends AbstractAssetHandler {
private static final Logger log = LoggerFactory.getLogger(OpenOfficeHandler.class);
public static final String MIMETYPE_DOCX
= "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
public static final String MIMETYPE_XLSX
= "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
public static final String[] MIMETYPES_SUPPORTED = new String[] {MIMETYPE_DOCX, MIMETYPE_XLSX};
// zip entries containing meta data
private static final String ENTRY_CORE = "docProps/core.xml";
private static final String ENTRY_APP = "docProps/app.xml";
//todo this needs to be tested with documents created on windows. (PNG vs JPEG)
private static final String ENTRY_THUMBNAIL = "docProps/thumbnail.jpeg";
private static final String ENTRY_THUMBNAIL_EMF = "docProps/thumbnail.emf";
@Reference(policy = ReferencePolicy.STATIC)
protected MimeTypeService mimeTypeService;
private static final String META_KEY_THUMBNAIL = "thumbnail";
public ExtractedMetadata extractMetadata(final Asset asset) {
final ZipInputStream zis = new ZipInputStream(new BufferedInputStream(asset.getOriginal().getStream()));
final ExtractedMetadata metadata = new ExtractedMetadata();
try {
readEntries(zis, metadata, asset);
} catch (Exception e) {
log.warn("Failed to extract metadata for {} reason: {}", asset.getPath(), e.getMessage());
log.debug("Stack Trace: ", e);
} finally {
IOUtils.closeQuietly(zis);
}
setMimetype(metadata, asset);
return metadata;
}
public BufferedImage getImage(final Rendition rendition) throws IOException {
return getImage(rendition, null);
}
public BufferedImage getImage(final Rendition rendition, final Dimension dim) throws IOException {
final Asset asset = rendition.getAsset();
final byte[] picture = (byte[]) extractMetadata(asset).getProperty(META_KEY_THUMBNAIL);
// if we have an embedded image
if (picture != null) {
return new Layer(new ByteArrayInputStream(picture), dim).getImage();
}
// according to the spec, a thumbnail representation of a document should be generated by default when the
// file is saved.
log.warn("Failed to retrieve thumbnail for {}", asset.getPath());
return null;
}
/**
* {@inheritDoc}
*/
public String[] getMimeTypes() {
return MIMETYPES_SUPPORTED;
}
/* all private methods */
private void readEntries(final ZipInputStream zis, final ExtractedMetadata metadata, final Asset asset) throws Exception {
ZipEntry entry;
while ((entry = zis.getNextEntry()) != null) {
final String name = entry.getName();
if (name.equals(ENTRY_CORE) || name.equals(ENTRY_APP)) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
IOUtils.copy(zis, out);
// build xml document to extract meta info
DocumentBuilderFactoryProvider factoryprovider = new DocumentBuilderFactoryProvider();
DocumentBuilderFactory documentBuilderFactory = factoryprovider.createSecureBuilderFactory(true);
Document document = documentBuilderFactory.newDocumentBuilder()
.parse(new ByteArrayInputStream(out.toByteArray()));
IOUtils.closeQuietly(out);
DocumentTraversal dt = (DocumentTraversal) document;
NodeIterator nit = dt.createNodeIterator(document, NodeFilter.SHOW_ELEMENT, null, true);
nit.nextNode(); // skip first node
Element next;
while ((next = (Element) nit.nextNode())!=null) {
metadata.setMetaDataProperty(next.getLocalName(), next.getTextContent());
}
} else if (name.equals(ENTRY_THUMBNAIL)) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
try{
IOUtils.copy(zis, out);
metadata.setProperty(META_KEY_THUMBNAIL, out.toByteArray());
}finally{
IOUtils.closeQuietly(out);
}
} else if (name.equals(ENTRY_THUMBNAIL_EMF)) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
String mimeType = mimeTypeService.getMimeType(name);
try{
IOUtils.copy(zis, out);
asset.addRendition("thumbnail.emf", new ByteArrayInputStream(out.toByteArray()),
mimeType);
}finally{
IOUtils.closeQuietly(out);
}
// TODO Handle EMF format file in a better way
// below code was creating circular reference with AssetStore
//AssetHandler handler = store.getAssetHandler(mimeType);
//BufferedImage img = handler.getImage(rend);
//if (img != null) {
// ByteArrayOutputStream baos = new ByteArrayOutputStream();
// try {
// ImageIO.write(img,JPEG_FORMAT , baos);
// baos.flush();
// metadata.setProperty(META_KEY_THUMBNAIL, baos.toByteArray());
// }finally{
// IOUtils.closeQuietly(baos);
// }
// } else {
// log.info("Cannot extract image from EMF format");
// }
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy