All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.xmlcalabash.extensions.Zip Maven / Gradle / Ivy

The newest version!
package com.xmlcalabash.extensions;


import com.xmlcalabash.core.XMLCalabash;
import com.xmlcalabash.core.XProcConstants;
import com.xmlcalabash.core.XProcException;
import com.xmlcalabash.core.XProcRuntime;
import com.xmlcalabash.io.DataStore;
import com.xmlcalabash.io.DataStore.DataInfo;
import com.xmlcalabash.io.DataStore.DataReader;
import com.xmlcalabash.io.ReadablePipe;
import com.xmlcalabash.io.WritablePipe;
import com.xmlcalabash.library.DefaultStep;
import com.xmlcalabash.runtime.XAtomicStep;
import com.xmlcalabash.util.AxisNodes;
import com.xmlcalabash.util.Base64;
import com.xmlcalabash.util.JSONtoXML;
import com.xmlcalabash.util.S9apiUtils;
import com.xmlcalabash.util.TreeWriter;
import com.xmlcalabash.util.XMLtoJSON;
import net.sf.saxon.s9api.Axis;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.Serializer;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmNodeKind;

import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.datatype.XMLGregorianCalendar;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
import java.util.GregorianCalendar;
import java.util.Hashtable;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.CRC32;
import java.util.zip.Deflater;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;

/**
 *
 * @author ndw
 */

@XMLCalabash(
        name = "pxp:zip",
        type = "{http://exproc.org/proposed/steps}zip " +
                "{http://xmlcalabash.com/ns/extensions}zip")

public class Zip extends DefaultStep {
    protected final static QName _href = new QName("", "href");
    protected final static QName _name = new QName("", "name");
    protected final static QName _command = new QName("", "command");
    protected final static QName _compression_method = new QName("", "compression-method");
    protected final static QName _compression_level = new QName("", "compression-level");
    protected final static QName c_zip_manifest = new QName("c", XProcConstants.NS_XPROC_STEP, "zip-manifest");
    protected final static QName c_zipfile = new QName("c", XProcConstants.NS_XPROC_STEP, "zipfile");
    protected final static QName c_entry = new QName("c", XProcConstants.NS_XPROC_STEP, "entry");
    protected final static QName c_file = new QName("c", XProcConstants.NS_XPROC_STEP, "file");
    protected final static QName c_directory = new QName("c", XProcConstants.NS_XPROC_STEP, "directory");
    protected final static QName _compressed_size = new QName("", "compressed-size");
    protected final static QName _comment = new QName("", "comment");
    protected final static QName _size = new QName("", "size");
    protected final static QName _date = new QName("", "date");
    private static final QName _status_only = new QName("status-only");
    private static final QName _detailed = new QName("detailed");
    private static final QName _status = new QName("status");
    private static final QName _value = new QName("value");
    private static final QName c_encoding = new QName("c", XProcConstants.NS_XPROC_STEP, "encoding");
    private static final QName c_body = new QName("c", XProcConstants.NS_XPROC_STEP, "body");
    private static final QName c_json = new QName("c", XProcConstants.NS_XPROC_STEP, "json");
    private static final QName _content_type = new QName("content-type");
    private final static int bufsize = 8192;

    private final static QName serializerAttrs[] = {
            _byte_order_mark,
            _cdata_section_elements,
            _doctype_public,
            _doctype_system,
            _encoding,
            _escape_uri_attributes,
            _include_content_type,
            _indent,
            _media_type,
            _method,
            _normalization_form,
            _omit_xml_declaration,
            _standalone,
            _undeclare_prefixes,
            _version
    };

    private ReadablePipe source = null;
    private ReadablePipe manifest = null;
    private WritablePipe result = null;
    private Map zipManifest = new LinkedHashMap ();
    private Map srcManifest = new LinkedHashMap ();

    /* Creates a new instance of Unzip */
    public Zip(XProcRuntime runtime, XAtomicStep step) {
        super(runtime,step);
    }

    public void setInput(String port, ReadablePipe pipe) {
        if ("source".equals(port)) {
            source = pipe;
        } else {
            manifest = pipe;
        }
    }

    public void setOutput(String port, WritablePipe pipe) {
        result = pipe;
    }

    public void reset() {
        source.resetReader();
        manifest.resetReader();
        result.resetWriter();
    }

    public void run() throws SaxonApiException {
        super.run();

        final String zipFn = getOption(_href).getString();

        XdmNode man = S9apiUtils.getDocumentElement(manifest.read());
        if (man == null) {
            throw new NullPointerException("XML document " + man.getDocumentURI() + " has no root element.");
        }

        if (!c_zip_manifest.equals(man.getNodeName())) {
            throw new XProcException(step, "The cx:zip manifest must be a c:zip-manifest.");
        }

        while (source.moreDocuments()) {
            XdmNode doc = source.read();
            XdmNode root = S9apiUtils.getDocumentElement(doc);
            if (root == null) {
                throw new NullPointerException("XML document " + doc.getDocumentURI() + " has no root element.");
            }
            srcManifest.put(root.getBaseURI().toASCIIString(), doc);
        }

        parseManifest(man);

        try {
            final String base = getOption(_href).getBaseURI().toASCIIString();
            final DataStore store = runtime.getDataStore();
            store.writeEntry(zipFn, base, "application/zip", new DataStore.DataWriter() {
                public void store(OutputStream content) throws IOException {
                    final ZipOutputStream outZip = new ZipOutputStream(content);
                    try {
                        store.readEntry(zipFn, base, "application/zip", null, new DataStore.DataReader() {
                            public void load(URI id, String media, InputStream content, long len)
                                    throws IOException {
                                ZipInputStream inZip = new ZipInputStream(content);
                                try {
                                    update(inZip, outZip);
                                } finally {
                                    inZip.close();
                                }
                            }
                        });
                    } catch (FileNotFoundException e) {
                        update(null, outZip);
                    } finally {
                        outZip.close();
                    }
                }
            });
        } catch (IOException e) {
            throw new XProcException(e);
        }

        try {
            final DatatypeFactory dfactory = DatatypeFactory.newInstance();
            DataStore store = runtime.getDataStore();
            store.readEntry(zipFn, zipFn, "application/zip, */*", null, new DataReader() {
                public void load(URI id, String media, InputStream stream, long len) throws IOException {
                    TreeWriter tree = new TreeWriter(runtime);

                    tree.startDocument(step.getNode().getBaseURI());
                    tree.addStartElement(c_zipfile);
                    tree.addAttribute(_href, id.toASCIIString());
                    tree.startContent();

                    if (zipFn.startsWith("file:/")) {
                        readFile(tree, id, zipFn, dfactory);
                    } else {
                        readStream(tree, id, stream, dfactory);
                    }

                    tree.addEndElement();
                    tree.endDocument();
                    result.write(tree.getResult());
                }
            });
        } catch (MalformedURLException mue) {
            throw new XProcException(XProcException.err_E0001, mue);
        } catch (IOException ioe) {
            throw new XProcException(XProcException.err_E0001, ioe);
        } catch (DatatypeConfigurationException dce) {
            throw new XProcException(XProcException.err_E0001, dce);
        }
    }

    private void update(ZipInputStream inZip, final ZipOutputStream outZip) {
        String command = getOption(_command).getString();
    
        if ("create".equals(command)) {
            try {
                if (inZip != null) {
                    inZip.close();
                }
            } catch (IOException ioe) {
                throw new XProcException(ioe);
            }
            inZip = null;
        }

        if ("update".equals(command) || "create".equals(command)) {
            update(inZip, outZip, false);
        } else if ("freshen".equals(command)) {
            update(inZip, outZip, true);
        } else if ("delete".equals(command)) {
            delete(inZip, outZip);
        } else {
            throw new XProcException(step, "Unexpected cx:zip command: " + command);
        }
    }

    private void readFile(TreeWriter tree, URI id, String zipFn, final DatatypeFactory dfactory)
            throws IOException {
        ZipFile zipFile = null;
        try {
            File uriFile = new File(new URI(zipFn));
            zipFile = new ZipFile(uriFile);

            Enumeration zipEntryEnum = zipFile.entries();
            while (zipEntryEnum.hasMoreElements()) {
                ZipEntry entry = zipEntryEnum.nextElement();
                processEntry(tree, entry, dfactory);
            }
        } catch (URISyntaxException e) {
            throw new XProcException(e);
        } finally {
            if (zipFile != null) {
                zipFile.close();
            }
        }
    }

    private void readStream(TreeWriter tree, URI id, InputStream stream, final DatatypeFactory dfactory)
            throws IOException {
        ZipInputStream zipStream = new ZipInputStream(stream);

        try {
            ZipEntry entry = zipStream.getNextEntry();
            while (entry != null) {
                processEntry(tree, entry, dfactory);
            }
        } finally {
            zipStream.close();
        }
   }

    private void processEntry(TreeWriter tree, ZipEntry entry, DatatypeFactory dfactory) {
        GregorianCalendar cal = new GregorianCalendar();
        cal.setTimeInMillis(entry.getTime());
        XMLGregorianCalendar xmlCal = dfactory.newXMLGregorianCalendar(cal);

        if (entry.isDirectory()) {
            tree.addStartElement(c_directory);
        } else {
            tree.addStartElement(c_file);

            tree.addAttribute(_compressed_size, ""+entry.getCompressedSize());
            tree.addAttribute(_size, ""+entry.getSize());
        }

        if (entry.getComment() != null) {
            tree.addAttribute(_comment, entry.getComment());
        }

        tree.addAttribute(_name, ""+entry.getName());
        tree.addAttribute(_date, xmlCal.toXMLFormat());
        tree.startContent();
        tree.addEndElement();
    }

    private void parseManifest(XdmNode man) {
        for (XdmNode child : new AxisNodes(man, Axis.CHILD, AxisNodes.SIGNIFICANT)) {
            if (XdmNodeKind.ELEMENT == child.getNodeKind()) {
                if (c_entry.equals(child.getNodeName())) {
                    String name = child.getAttributeValue(_name);
                    if (name == null || "".equals(name)) {
                        throw new XProcException(step, "Missing or invalid name in cx:zip manifest.");
                    }
                    String href = child.getAttributeValue(_href);
                    if (href == null || "".equals(href)) {
                        throw new XProcException(step, "Missing or invalid href in cx:zip manifest.");
                    }
                    String hrefuri = child.getBaseURI().resolve(href).toASCIIString();
                    String comment = child.getAttributeValue(_comment);

                    int method = ZipEntry.DEFLATED;
                    int level = Deflater.DEFAULT_COMPRESSION;

                    String value = child.getAttributeValue(_compression_method);
                    if ("stored".equals(value)) {
                        method = ZipEntry.STORED;
                    }

                    value = child.getAttributeValue(_compression_level);
                    if ("smallest".equals(value)) {
                        level = Deflater.BEST_COMPRESSION;
                    } else if ("fastest".equals(value)) {
                        level = Deflater.BEST_SPEED;
                    } else if ("huffman".equals(value)) {
                        level = Deflater.HUFFMAN_ONLY;
                    } else if ("none".equals(value)) {
                        level = Deflater.NO_COMPRESSION;
                        method = ZipEntry.STORED;
                    }

                    zipManifest.put(name, new FileToZip(name, hrefuri, method, level, comment, child));
                } else {
                    throw new XProcException(step, "Unexpected element in cx:zip manifest: " + child.getNodeName());
                }
            } else {
                    throw new XProcException(step, "Unexpected content in cx:zip manifest.");
            }
        }
    }

    public void update(ZipInputStream inZip, final ZipOutputStream outZip, boolean freshen) {
        final byte[] buffer = new byte[bufsize];

        try {
            if (inZip != null) {
                ZipEntry entry;
                while ((entry = inZip.getNextEntry()) != null) {
                    String name = entry.getName();

                    boolean skip = srcManifest.containsKey(name);

                    if (!skip) {
                        if (zipManifest.containsKey(name) && freshen) {
                            FileToZip file = zipManifest.get(name);
                            long zipDate = entry.getTime();
                            long lastMod = file.getLastModified();

                            skip = (lastMod > zipDate);
                            if (!skip) {
                                zipManifest.remove(name);
                            }
                        } else if (zipManifest.containsKey(name)) {
                            skip = true;
                        }
                    }

                    if (!skip) {
                        ZipEntry copy = new ZipEntry(entry);
                        copy.setCompressedSize(-1);
                        outZip.putNextEntry(copy);
                        int read = inZip.read(buffer, 0, bufsize);
                        while (read >= 0) {
                            outZip.write(buffer,0, read);
                            read = inZip.read(buffer, 0, bufsize);
                        }
                        outZip.closeEntry();
                    }
                }
            }

            CRC32 crc = new CRC32();
            
            for (String name : zipManifest.keySet()) {
                FileToZip file = zipManifest.get(name);
                ZipEntry ze = new ZipEntry(name);
                if (file.getComment() != null) {
                    ze.setComment(file.getComment());
                }
                ze.setMethod(file.getMethod());
                outZip.setLevel(file.getLevel());
                
                URI uri = zipManifest.get(name).getHref();
                String href = uri.toASCIIString();
                
                if(ze.getMethod() == ZipEntry.STORED) {
                    // FIXME: Using a boas is risky here, it will fail for huge files; but who STOREs a huge file?
                    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
                    if (srcManifest.containsKey(uri.toString())) {
                        XdmNode doc = srcManifest.get(href);
                        store(file, doc, baos);
                    } else {
                        DataStore store = runtime.getDataStore();
                        store.readEntry(href, href, "*/*", null, new DataReader() {
                            public void load(URI id, String media,
                                    InputStream stream, long len)
                                    throws IOException {
                                int read = stream.read(buffer, 0, bufsize);
                                while (read>0){
                                    baos.write(buffer,0,read);
                                    read = stream.read(buffer, 0, bufsize);
                                }
                            }
                        });
                    }
                    byte[] bytes =  baos.toByteArray();
                    ze.setSize(bytes.length);
                    crc.reset();
                    crc.update(bytes);
                    ze.setCrc(crc.getValue());
                }

                outZip.putNextEntry(ze);

                if (srcManifest.containsKey(href)) {
                    XdmNode doc = srcManifest.get(href);
                    store(file, doc, outZip);
                } else {
                    DataStore store = runtime.getDataStore();
                    store.readEntry(href, href, "*/*", null, new DataReader() {
                        public void load(URI id, String media,
                                InputStream stream, long len)
                                throws IOException {
                            int read = stream.read(buffer, 0, bufsize);
                            while (read >= 0) {
                                outZip.write(buffer,0, read);
                                read = stream.read(buffer, 0, bufsize);
                            }
                        }
                    });
                }

                outZip.closeEntry();
            }
        } catch (IOException ioe) {
            throw new XProcException(ioe);
        } catch (SaxonApiException sae) {
            throw new XProcException(sae);
        }
    }

    public void delete(ZipInputStream inZip, ZipOutputStream outZip) {
        try {
            if (inZip != null) {
                ZipEntry entry;
                while ((entry = inZip.getNextEntry()) != null) {
                    String name = entry.getName();
                    boolean delete = false;

                    if (zipManifest.containsKey(name)) {
                        delete = true;
                    }

                    if (!delete) {
                        outZip.putNextEntry(entry);
                        byte[] buffer = new byte[bufsize];
                        int read = inZip.read(buffer, 0, bufsize);
                        while (read >= 0) {
                            outZip.write(buffer,0, read);
                            read = inZip.read(buffer, 0, bufsize);
                        }
                        outZip.closeEntry();
                    }
                }
            }
        } catch (IOException ioe) {
            throw new XProcException(ioe);
        }
    }

    private class FileToZip {
        private String zipName = null;
        private URI href = null;
        private String origHref = null;
        private int method = -1;
        private int level = -1;
        private String comment = null;
        private long lastModified = -1;
        private Hashtable options = null;

        public FileToZip(String zipName, String href, int method, int level, String comment, XdmNode entry) {
            try {
                origHref = href;
                this.zipName = zipName;
                this.href = new URI(href);
                this.method = method;
                this.level = level;
                this.comment = comment;

                lastModified = readLastModified(this.href);

                // FIXME: There's no validation here...
                for (QName attr : serializerAttrs) {
                    String value = entry.getAttributeValue(attr);
                    if (value != null) {
                        if (options == null) {
                            options = new Hashtable ();
                        }
                        options.put(attr, value);
                    }
                }
            } catch (URISyntaxException use) {
                throw new XProcException(use);
            }
        }

        public String getName() {
            return zipName;
        }

        public URI getHref() {
            return href;
        }

        public int getMethod() {
            return method;
        }

        public int getLevel() {
            return level;
        }

        public String getComment() {
            return comment;
        }

        public long getLastModified() {
            return lastModified;
        }

        public Hashtable getOptions() {
            return options;
        }

        private long readLastModified(URI uri) {
            if (srcManifest.containsKey(origHref)) {
                // If the document to be zipped is in the set of source documents,
                // don't try to read its timestamp from the disk or the web.
                // Use "now".
                Date date = new Date();
                return date.getTime();
            }

            final List list = new ArrayList(1);
            DataStore store = runtime.getDataStore();
            try {
                store.infoEntry(uri.toASCIIString(), uri.toASCIIString(), "*/*", new DataInfo() {
                    public void list(URI id, String media, long lastModified)
                            throws IOException {
                        list.add(lastModified);
                    }
                });
            } catch (IOException e) {
                throw new XProcException(e);
            }
            if (list.size() == 1) {
                return list.get(0);
            } else {
                return -1;
            }
        }
    }

    protected void store(FileToZip file, XdmNode doc, OutputStream out) throws SaxonApiException, IOException {
        XdmNode root = S9apiUtils.getDocumentElement(doc);

        if (((XProcConstants.NS_XPROC_STEP.equals(root.getNodeName().getNamespaceURI())
                && "base64".equals(root.getAttributeValue(_encoding)))
                || ("".equals(root.getNodeName().getNamespaceURI())
                && "base64".equals(root.getAttributeValue(c_encoding))))) {
            storeBinary(file, doc, out);
        } else if (XProcConstants.c_result.equals(root.getNodeName())
                && root.getAttributeValue(_content_type) != null
                && root.getAttributeValue(_content_type).startsWith("text/")) {
            storeText(file, doc, out);
        } else if (runtime.transparentJSON()
                && (((c_body.equals(root.getNodeName())
                && ("application/json".equals(root.getAttributeValue(_content_type))
                || "text/json".equals(root.getAttributeValue(_content_type))))
                || c_json.equals(root.getNodeName()))
                || JSONtoXML.JSONX_NS.equals(root.getNodeName().getNamespaceURI())
                || JSONtoXML.JXML_NS.equals(root.getNodeName().getNamespaceURI())
                || JSONtoXML.MLJS_NS.equals(root.getNodeName().getNamespaceURI()))) {
            storeJSON(file, doc, out);
        } else {
            storeXML(file, doc, out);
        }
    }

    public void storeBinary(FileToZip file, XdmNode doc, OutputStream out) throws IOException {
        byte[] decoded = Base64.decode(doc.getStringValue());
        out.write(decoded);
    }

    public void storeText(FileToZip file, XdmNode doc, OutputStream out) throws IOException {
        out.write(doc.getStringValue().getBytes());
    }

    public void storeJSON(FileToZip file, XdmNode doc, OutputStream out) {
        PrintWriter writer = null;
        try {
            writer = new PrintWriter(new OutputStreamWriter(out, "UTF-8"));
        } catch (UnsupportedEncodingException e) {
            // This can't happen
        }
        try {
            String json = XMLtoJSON.convert(doc);
            writer.print(json);
        } finally { 
            writer.close();
        }
    }

    public void storeXML(FileToZip file, XdmNode doc, OutputStream out) throws SaxonApiException {
        Serializer serializer = makeSerializer(file.getOptions());
        serializer.setOutputStream(out);
        S9apiUtils.serialize(runtime, doc, serializer);
    }

    public Serializer makeSerializer(Hashtable options) {
        Serializer serializer = runtime.getProcessor().newSerializer();

        if (options == null) {
            return serializer;
        }

        if (options.containsKey(_byte_order_mark)) {
            serializer.setOutputProperty(Serializer.Property.BYTE_ORDER_MARK, "false".equals(options.get(_byte_order_mark)) ? "yes" : "no");
        }

        if (options.containsKey(_cdata_section_elements)) {
            String list = options.get(_cdata_section_elements);

            // FIXME: Why is list="" sometimes?
            if (!"".equals(list)) {
                String[] names = list.split("\\s+");
                list = "";
                for (String name : names) {
                    QName q = new QName(name, step.getNode());
                    list += q.getClarkName() + " ";
                }

                serializer.setOutputProperty(Serializer.Property.CDATA_SECTION_ELEMENTS, list);
            }
        }

        if (options.containsKey(_doctype_public)) {
            serializer.setOutputProperty(Serializer.Property.DOCTYPE_PUBLIC, options.get(_doctype_public));
        }

        if (options.containsKey(_doctype_system)) {
            serializer.setOutputProperty(Serializer.Property.DOCTYPE_SYSTEM, options.get(_doctype_system));
        }

        if (options.containsKey(_encoding)) {
            serializer.setOutputProperty(Serializer.Property.ENCODING, options.get(_encoding));
        }

        if (options.containsKey(_escape_uri_attributes)) {
            serializer.setOutputProperty(Serializer.Property.ESCAPE_URI_ATTRIBUTES, "true".equals(options.get(_escape_uri_attributes)) ? "yes" : "no");
        }

        if (options.containsKey(_include_content_type)) {
            serializer.setOutputProperty(Serializer.Property.INCLUDE_CONTENT_TYPE, "true".equals(options.get(_include_content_type)) ? "yes" : "no");
        }

        if (options.containsKey(_indent)) {
            serializer.setOutputProperty(Serializer.Property.INDENT, "true".equals(options.get(_indent)) ? "yes" : "no");
        }

        if (options.containsKey(_media_type)) {
            serializer.setOutputProperty(Serializer.Property.MEDIA_TYPE, options.get(_media_type));
        }

        if (options.containsKey(_method)) {
            serializer.setOutputProperty(Serializer.Property.METHOD, options.get(_method));
        }

        if (options.containsKey(_normalization_form)) {
            serializer.setOutputProperty(Serializer.Property.NORMALIZATION_FORM, options.get(_normalization_form));
        }

        if (options.containsKey(_omit_xml_declaration)) {
            serializer.setOutputProperty(Serializer.Property.OMIT_XML_DECLARATION, "true".equals(options.get(_omit_xml_declaration)) ? "yes" : "no");
        }

        if (options.containsKey(_standalone)) {
            String standalone = options.get(_standalone);
            if ("true".equals(standalone)) {
                serializer.setOutputProperty(Serializer.Property.STANDALONE, "yes");
            } else if ("false".equals(standalone)) {
                serializer.setOutputProperty(Serializer.Property.STANDALONE, "no");
            }
            // What about omit?
        }

        if (options.containsKey(_undeclare_prefixes)) {
            serializer.setOutputProperty(Serializer.Property.UNDECLARE_PREFIXES, "true".equals(options.get(_undeclare_prefixes)) ? "yes" : "no");
        }

        if (options.containsKey(_version)) {
            serializer.setOutputProperty(Serializer.Property.VERSION, options.get(_version));
        }

        return serializer;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy