com.day.cq.dam.handler.standard.epub.EPubExtractor Maven / Gradle / Ivy

Go to download
/*************************************************************************
 *
 * ADOBE CONFIDENTIAL
 * __________________
 *
 *  Copyright 2012 Adobe Systems Incorporated
 *  All Rights Reserved.
 *
 * NOTICE:  All information contained herein is, and remains
 * the property of Adobe Systems Incorporated and its suppliers,
 * if any.  The intellectual and technical concepts contained
 * herein are proprietary to Adobe Systems Incorporated and its
 * suppliers and are protected by trade secret or copyright law.
 * Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained
 * from Adobe Systems Incorporated.
 **************************************************************************/

package com.day.cq.dam.handler.standard.epub;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.apache.commons.io.IOUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import com.day.cq.dam.commons.xml.DocumentBuilderFactoryProvider;

/**
 * this class is required to override InputStream's close
 * the DOM parse seems to close the inputstream
 *
 * */

class NonCloseableInputStream extends InputStream {
    protected InputStream master;
    protected NonCloseableInputStream (InputStream in) {
        master = in;
    }
    public int read () throws IOException {
        return master.read ();
    }
}

/**
 * The EPub extractor implementation
 *
 * */

public class EPubExtractor {

    public EPubExtractor () throws Exception {
    }

    /**
     * Look for a .opf file in the zip achive and extract metadata out of it
     * return it as a Map
     *
     * */

    public Map extractMetadata (InputStream in) throws Exception {
        ZipInputStream zin = null;
        try {
            zin = new ZipInputStream (in);
            ZipEntry e = null;
            while ( (e = zin.getNextEntry()) != null) {
                if (e.getName ().endsWith (".opf")) {
                    return readMetadata (zin);
                }
            }
        } catch (Exception ex) {
            throw ex;
        } finally {
            if (zin != null) {
                zin.close ();
            }
        }
        return null;
    }
    /**
     * Extract the cover image with with href
     *
     * */

    public byte[] extractImage (InputStream in, String href) throws Exception {
        ZipInputStream zin = null;
        try {
            zin = new ZipInputStream (in);
            ZipEntry e = null;
            while ( (e = zin.getNextEntry()) != null) {
                if (e.getName ().endsWith (href)) {
                    ByteArrayOutputStream out = new ByteArrayOutputStream();
                    IOUtils.copy (zin, out);
                    return out.toByteArray ();
                }
            }
        } catch (Exception ex) {
            throw ex;
        } finally {
            if (zin != null) {
                zin.close ();
            }
        }
        return null;
    }
    /**
     * Extract metadata from the inputstream
     * return it as a Map
     *
     * */

    protected Map readMetadata (InputStream in) throws Exception {
        Map rv = new HashMap();

        DocumentBuilderFactoryProvider factoryprovider = new DocumentBuilderFactoryProvider();
        DocumentBuilderFactory dfactory = factoryprovider.createSecureBuilderFactory(true);
        DocumentBuilder builder = dfactory.newDocumentBuilder();
        Document doc            = builder.parse (new NonCloseableInputStream (in));

        String covername = null;

        NodeList ls = doc.getFirstChild ().getChildNodes ();
        for (int i = 0; i < ls.getLength (); i++) {
            Node n = ls.item (i);
            if ( (n.getNodeType () == Node.ELEMENT_NODE) && (n.getLocalName ().equals ("metadata"))) {
                NodeList mls = n.getChildNodes ();
                for (int z = 0; z < mls.getLength (); z++) {
                    Node m = mls.item (z);

                    if ( (m.getNodeType () == Node.ELEMENT_NODE) && (m.getLocalName ().equals ("meta"))) {
                        String name = ( (Element) m).getAttribute ("name");
                        if ("cover".equals (name)) {
                            covername = ( (Element) m).getAttribute ("content");
                        }
                    } else if (m.getNodeType () == Node.ELEMENT_NODE) {
                        String mname = m.getLocalName ();
                        String mval  = m.getTextContent ();

                        rv.put (mname, mval);
                    }
                }
            }
            if ( (n.getNodeType () == Node.ELEMENT_NODE) && (n.getLocalName ().equals ("manifest"))) {
                NodeList mls = n.getChildNodes ();
                for (int z = 0; z < mls.getLength (); z++) {
                    Node m = mls.item (z);

                    if ( (m.getNodeType () == Node.ELEMENT_NODE) && (m.getLocalName ().equals ("item"))) {
                        String name = ( (Element) m).getAttribute ("id");
                        if (name.equals (covername)) {
                            rv.put ("_coverhref", ( (Element) m).getAttribute ("href"));
                        }
                    }
                }
            }

        }

        return rv;
    }
}