All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.xmlcalabash.extensions.Unzip Maven / Gradle / Ivy

The newest version!
package com.xmlcalabash.extensions;


import com.xmlcalabash.core.XMLCalabash;
import com.xmlcalabash.core.XProcConstants;
import com.xmlcalabash.core.XProcException;
import com.xmlcalabash.core.XProcRuntime;
import com.xmlcalabash.io.DataStore;
import com.xmlcalabash.io.DataStore.DataReader;
import com.xmlcalabash.io.WritablePipe;
import com.xmlcalabash.library.DefaultStep;
import com.xmlcalabash.runtime.XAtomicStep;
import com.xmlcalabash.util.Base64;
import com.xmlcalabash.util.TreeWriter;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.XdmNode;
import org.xml.sax.InputSource;

import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.datatype.XMLGregorianCalendar;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URLEncoder;
import java.util.GregorianCalendar;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

/**
 *
 * @author ndw
 */

@XMLCalabash(
        name = "pxp:unzip",
        type = "{http://exproc.org/proposed/steps}unzip " +
                "{http://xmlcalabash.com/ns/extensions}unzip")

public class Unzip extends DefaultStep {
    private static final String ACCEPT_ZIP = "application/zip, */*";
    protected final static QName _href = new QName("", "href");
    protected final static QName _content_type = new QName("", "content-type");
    protected final static QName c_zipfile = new QName("c", XProcConstants.NS_XPROC_STEP, "zipfile");
    protected final static QName c_file = new QName("c", XProcConstants.NS_XPROC_STEP, "file");
    protected final static QName _file = new QName("", "file");
    protected final static QName _charset = new QName("", "charset");
    protected final static QName _name = new QName("", "name");
    protected final static QName c_directory = new QName("c", XProcConstants.NS_XPROC_STEP, "directory");
    protected final static QName _compressed_size = new QName("", "compressed-size");
    protected final static QName _comment = new QName("", "comment");
    protected final static QName _size = new QName("", "size");
    protected final static QName _date = new QName("", "date");

    private WritablePipe result = null;
    private String zipFn = null;
    private URI zipURI = null;
    private String name = null;
    private String contentType = "application/xml";
    private String charset = null;

    /* Creates a new instance of Unzip */
    public Unzip(XProcRuntime runtime, XAtomicStep step) {
        super(runtime,step);
    }

    public void setOutput(String port, WritablePipe pipe) {
        result = pipe;
    }

    public void reset() {
        result.resetWriter();
    }

    public void run() throws SaxonApiException {
        super.run();
        
        zipFn = getOption(_href).getString();
        zipURI = getOption(_href).getBaseURI();

        if (getOption(_file) != null) {
            name = getOption(_file).getString();
        }

        if (getOption(_content_type) != null) {
            contentType = getOption(_content_type).getString();
        }

        if (getOption(_charset) != null) {
            charset = getOption(_charset).getString();
        }

        try {
            final DatatypeFactory dfactory = DatatypeFactory.newInstance();
            DataStore store = runtime.getDataStore();
            String base = zipURI.toASCIIString();
            store.readEntry(zipFn, base, ACCEPT_ZIP, null, new DataReader() {
                public void load(URI id, String media, InputStream content,
                        long len) throws IOException {
                    unzip(dfactory, id.toASCIIString(), content);
                }
            });
        } catch (MalformedURLException mue) {
            throw new XProcException(XProcException.err_E0001, mue);
        } catch (IOException ioe) {
            throw new XProcException(XProcException.err_E0001, ioe);
        } catch (DatatypeConfigurationException dce) {
            throw new XProcException(XProcException.err_E0001, dce);
        }
    }

    void unzip(DatatypeFactory dfactory, String systemId, InputStream stream) throws IOException {
        ZipInputStream zipFile = new ZipInputStream(stream);

        try {
            TreeWriter tree = new TreeWriter(runtime);

            if (name == null) {
                tree.startDocument(step.getNode().getBaseURI());
                tree.addStartElement(c_zipfile);
                tree.addAttribute(_href, systemId);
                tree.startContent();

                GregorianCalendar cal = new GregorianCalendar();

                ZipEntry entry = zipFile.getNextEntry();
                while (entry != null) {
                    cal.setTimeInMillis(entry.getTime());
                    XMLGregorianCalendar xmlCal = dfactory.newXMLGregorianCalendar(cal);

                    if (entry.isDirectory()) {
                        tree.addStartElement(c_directory);
                    } else {
                        tree.addStartElement(c_file);

                        tree.addAttribute(_compressed_size, ""+entry.getCompressedSize());
                        tree.addAttribute(_size, ""+entry.getSize());
                    }

                    if (entry.getComment() != null) {
                        tree.addAttribute(_comment, entry.getComment());
                    }

                    tree.addAttribute(_name, ""+entry.getName());
                    tree.addAttribute(_date, xmlCal.toXMLFormat());
                    tree.startContent();
                    tree.addEndElement();
                    entry = zipFile.getNextEntry();
                }

                tree.addEndElement();
                tree.endDocument();
                result.write(tree.getResult());
            } else {
                ZipEntry entry = zipFile.getNextEntry();
                while (entry != null) {
                    if (name.equals(entry.getName())) {
                        break;
                    }
                    entry = zipFile.getNextEntry();
                }

                if (entry == null) {
                    throw new XProcException(step, "ZIP file does not contain '" + name + "'");
                }

                if ("application/xml".equals(contentType) || "text/xml".equals(contentType)
                        || contentType.endsWith("+xml")) {
                    InputSource isource = new InputSource(zipFile);
                    XdmNode doc = runtime.parse(isource);
                    result.write(doc);
                } else {
                    boolean storeText = (contentType != null && contentType.startsWith("text/") && charset != null);

                    // There's no point giving the file the URI of the pipeline document.
                    // This formulation is parallel to the jar scheme.
                    URI zipURI = URI.create("zip:" + zipFn + "!" + URLEncoder.encode(entry.getName(), "UTF-8"));

                    tree.startDocument(zipURI);
                    tree.addStartElement(XProcConstants.c_data);
                    tree.addAttribute(_name,name);
                    tree.addAttribute(_content_type, contentType);
                    if (!storeText) {
                        tree.addAttribute(_encoding, "base64");
                    }
                    tree.startContent();

                    if (storeText) {
                        InputStreamReader reader = new InputStreamReader(zipFile, charset);
                        try {
                            int maxlen = 4096;
                            char[] chars = new char[maxlen];
                            int read = reader.read(chars, 0, maxlen);
                            while (read >= 0) {
                                if (read > 0) {
                                    String s = new String(chars);
                                    tree.addText(s);
                                }
                                read = reader.read(chars, 0, maxlen);
                            }
                        } finally {
                            reader.close();
                        }
                    } else {
                        BufferedInputStream bufstream = new BufferedInputStream(zipFile);
                        try {
                            int maxlen = 4096 * 3;
                            byte[] bytes = new byte[maxlen];
                            int read = bufstream.read(bytes, 0, maxlen);
                            while (read >= 0) {
                                if (read > 0) {
                                    String base64 = Base64.encodeBytes(bytes, 0, read);
                                    tree.addText(base64 + "\n");
                                }
                                read = bufstream.read(bytes, 0, maxlen);
                            }
                        } finally {
                            bufstream.close();
                        }
                    }

                    tree.addEndElement();
                    tree.endDocument();
                    result.write(tree.getResult());
                }
            }
        } finally {
            zipFile.close();
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy