com.lowagie.text.pdf.XfaForm Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of openpdf Show documentation
There is a newer version: 2.0.3
/*
 * $Id: XfaForm.java 4105 2009-11-27 12:52:57Z blowagie $
 *
 * Copyright 2006 Paulo Soares
 *
 * The contents of this file are subject to the Mozilla Public License Version 1.1
 * (the "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the License.
 *
 * The Original Code is 'iText, a free JAVA-PDF library'.
 *
 * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
 * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
 * All Rights Reserved.
 * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
 * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
 *
 * Contributor(s): all the names of the contributors are added in the source code
 * where applicable.
 *
 * Alternatively, the contents of this file may be used under the terms of the
 * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
 * provisions of LGPL are applicable instead of those above.  If you wish to
 * allow use of your version of this file only under the terms of the LGPL
 * License and not to allow others to use your version of this file under
 * the MPL, indicate your decision by deleting the provisions above and
 * replace them with the notice and other provisions required by the LGPL.
 * If you do not delete the provisions above, a recipient may use your version
 * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the MPL as stated above or under the terms of the GNU
 * Library General Public License as published by the Free Software Foundation;
 * either version 2 of the License, or any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
 * details.
 *
 * If you didn't download this code from the following link, you should check if
 * you aren't using an obsolete version:
 * https://github.com/LibrePDF/OpenPDF
 */

package com.lowagie.text.pdf;

import com.lowagie.text.xml.XmlDomWriter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EmptyStackException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/**
 * Processes XFA forms.
 *
 * @author Paulo Soares ([email protected])
 */
public class XfaForm {

    public static final String XFA_DATA_SCHEMA = "http://www.xfa.org/schema/xfa-data/1.0/";
    private Xml2SomTemplate templateSom;
    private Node templateNode;
    private Xml2SomDatasets datasetsSom;
    private Node datasetsNode;
    private AcroFieldsSearch acroFieldsSom;
    private PdfReader reader;
    private boolean xfaPresent;
    private org.w3c.dom.Document domDocument;
    private boolean changed;

    /**
     * An empty constructor to build on.
     */
    public XfaForm() {
    }

    /**
     * A constructor from a PdfReader. It basically does everything from finding the XFA stream to the XML
     * parsing.
     *
     * @param reader the reader
     * @throws java.io.IOException                            on error
     * @throws javax.xml.parsers.ParserConfigurationException on error
     * @throws org.xml.sax.SAXException                       on error
     */
    public XfaForm(PdfReader reader) throws IOException, ParserConfigurationException, SAXException {
        this.reader = reader;
        PdfObject xfa = getXfaObject(reader);
        if (xfa == null) {
            xfaPresent = false;
            return;
        }
        xfaPresent = true;
        ByteArrayOutputStream bout = new ByteArrayOutputStream();
        if (xfa.isArray()) {
            PdfArray ar = (PdfArray) xfa;
            for (int k = 1; k < ar.size(); k += 2) {
                PdfObject ob = ar.getDirectObject(k);
                if (ob instanceof PRStream) {
                    byte[] b = PdfReader.getStreamBytes((PRStream) ob);
                    bout.write(b);
                }
            }
        } else if (xfa instanceof PRStream) {
            byte[] b = PdfReader.getStreamBytes((PRStream) xfa);
            bout.write(b);
        }
        bout.close();
        DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance();
        fact.setNamespaceAware(true);
        DocumentBuilder db = fact.newDocumentBuilder();
        db.setEntityResolver((publicId, systemId) -> new InputSource(new StringReader("")));
        domDocument = db.parse(new ByteArrayInputStream(bout.toByteArray()));
        extractNodes();
    }

    /**
     * Return the XFA Object, could be an array, could be a Stream. Returns null f no XFA Object is present.
     *
     * @param reader a PdfReader instance
     * @return the XFA object
     * @since 2.1.3
     */
    public static PdfObject getXfaObject(PdfReader reader) {
        PdfDictionary af = (PdfDictionary) PdfReader.getPdfObjectReleaseNullConverting(
                reader.getCatalog().get(PdfName.ACROFORM));
        if (af == null) {
            return null;
        }
        return PdfReader.getPdfObjectRelease(af.get(PdfName.XFA));
    }

    /**
     * Sets the XFA key from a byte array. The old XFA is erased.
     *
     * @param form   the data
     * @param reader the reader
     * @param writer the writer
     * @throws java.io.IOException on error
     */
    public static void setXfa(XfaForm form, PdfReader reader, PdfWriter writer) throws IOException {
        PdfDictionary af = (PdfDictionary) PdfReader.getPdfObjectRelease(reader.getCatalog().get(PdfName.ACROFORM));
        if (af == null) {
            return;
        }
        PdfObject xfa = getXfaObject(reader);
        if (xfa.isArray()) {
            PdfArray ar = (PdfArray) xfa;
            int t = -1;
            int d = -1;
            for (int k = 0; k < ar.size(); k += 2) {
                PdfString s = ar.getAsString(k);
                if ("template".equals(s.toString())) {
                    t = k + 1;
                }
                if ("datasets".equals(s.toString())) {
                    d = k + 1;
                }
            }
            if (t > -1 && d > -1) {
                reader.killXref(ar.getAsIndirectObject(t));
                reader.killXref(ar.getAsIndirectObject(d));
                PdfStream tStream = new PdfStream(serializeDoc(form.templateNode));
                tStream.flateCompress(writer.getCompressionLevel());
                ar.set(t, writer.addToBody(tStream).getIndirectReference());
                PdfStream dStream = new PdfStream(serializeDoc(form.datasetsNode));
                dStream.flateCompress(writer.getCompressionLevel());
                ar.set(d, writer.addToBody(dStream).getIndirectReference());
                af.put(PdfName.XFA, new PdfArray(ar));
                return;
            }
        }
        reader.killXref(af.get(PdfName.XFA));
        PdfStream str = new PdfStream(serializeDoc(form.domDocument));
        str.flateCompress(writer.getCompressionLevel());
        PdfIndirectReference ref = writer.addToBody(str).getIndirectReference();
        af.put(PdfName.XFA, ref);
    }

    /**
     * Serializes a XML document to a byte array.
     *
     * @param n the XML document
     * @return the serialized XML document
     * @throws java.io.IOException on error
     */
    public static byte[] serializeDoc(Node n) throws IOException {
        XmlDomWriter xw = new XmlDomWriter();
        ByteArrayOutputStream fout = new ByteArrayOutputStream();
        xw.setOutput(fout, null);
        xw.setCanonical(false);
        xw.write(n);
        fout.close();
        return fout.toByteArray();
    }

    /**
     * Gets all the text contained in the child nodes of this node.
     *
     * @param n the Node
     * @return the text found or "" if no text was found
     */
    public static String getNodeText(Node n) {
        if (n == null) {
            return "";
        }
        return getNodeText(n, "");

    }

    private static String getNodeText(Node n, String name) {
        Node n2 = n.getFirstChild();
        while (n2 != null) {
            if (n2.getNodeType() == Node.ELEMENT_NODE) {
                name = getNodeText(n2, name);
            } else if (n2.getNodeType() == Node.TEXT_NODE) {
                name += n2.getNodeValue();
            }
            n2 = n2.getNextSibling();
        }
        return name;
    }

    /**
     * Extracts the nodes from the domDocument.
     *
     * @since 2.1.5
     */
    private void extractNodes() {
        Node n = domDocument.getFirstChild();
        while (n.getChildNodes().getLength() == 0) {
            n = n.getNextSibling();
        }
        n = n.getFirstChild();
        while (n != null) {
            if (n.getNodeType() == Node.ELEMENT_NODE) {
                String s = n.getLocalName();
                if (s.equals("template")) {
                    templateNode = n;
                    templateSom = new Xml2SomTemplate(n);
                } else if (s.equals("datasets")) {
                    datasetsNode = n;
                    datasetsSom = new Xml2SomDatasets(n.getFirstChild());
                }
            }
            n = n.getNextSibling();
        }
    }

    /**
     * Sets the XFA key from the instance data. The old XFA is erased.
     *
     * @param writer the writer
     * @throws java.io.IOException on error
     */
    public void setXfa(PdfWriter writer) throws IOException {
        setXfa(this, reader, writer);
    }

    /**
     * Returns true if it is a XFA form.
     *
     * @return true if it is a XFA form
     */
    public boolean isXfaPresent() {
        return xfaPresent;
    }

    /**
     * Sets the XFA form flag signaling that this is a valid XFA form.
     *
     * @param xfaPresent the XFA form flag signaling that this is a valid XFA form
     */
    public void setXfaPresent(boolean xfaPresent) {
        this.xfaPresent = xfaPresent;
    }

    /**
     * Gets the top level DOM document.
     *
     * @return the top level DOM document
     */
    public org.w3c.dom.Document getDomDocument() {
        return domDocument;
    }

    /**
     * Sets the top DOM document.
     *
     * @param domDocument the top DOM document
     */
    public void setDomDocument(org.w3c.dom.Document domDocument) {
        this.domDocument = domDocument;
        extractNodes();
    }

    /**
     * Finds the complete field name contained in the "classic" forms from a partial name.
     *
     * @param name the complete or partial name
     * @param af   the fields
     * @return the complete name or null if not found
     */
    public String findFieldName(String name, AcroFields af) {
        Map items = af.getAllFields();
        if (items.containsKey(name)) {
            return name;
        }
        if (acroFieldsSom == null) {
            if (items.isEmpty() && xfaPresent) {
                acroFieldsSom = new AcroFieldsSearch(datasetsSom.getNodesByName().keySet());
            } else {
                acroFieldsSom = new AcroFieldsSearch(items.keySet());
            }
        }
        if (acroFieldsSom.getLongByShortNames().containsKey(name)) {
            return acroFieldsSom.getLongByShortNames().get(name);
        }
        return acroFieldsSom.inverseSearch(Xml2Som.splitParts(name));
    }

    /**
     * Finds the complete SOM name contained in the datasets section from a possibly partial name.
     *
     * @param name the complete or partial name
     * @return the complete name or null if not found
     */
    public String findDatasetsName(String name) {
        if (datasetsSom.getNodesByName().containsKey(name)) {
            return name;
        }
        return datasetsSom.inverseSearch(Xml2Som.splitParts(name));
    }

    /**
     * Finds the Node contained in the datasets section from a possibly partial name.
     *
     * @param name the complete or partial name
     * @return the Node or null if not found
     */
    public Node findDatasetsNode(String name) {
        if (name == null) {
            return null;
        }
        name = findDatasetsName(name);
        if (name == null) {
            return null;
        }
        return datasetsSom.getNodesByName().get(name);
    }

    /**
     * Sets the text of this node. All the child's node are deleted and a new child text node is created.
     *
     * @param n    the Node to add the text to
     * @param text the text to add
     */
    public void setNodeText(Node n, String text) {
        if (n == null) {
            return;
        }
        Node nc;
        while ((nc = n.getFirstChild()) != null) {
            n.removeChild(nc);
        }
        if (n.getAttributes().getNamedItemNS(XFA_DATA_SCHEMA, "dataNode") != null) {
            n.getAttributes().removeNamedItemNS(XFA_DATA_SCHEMA, "dataNode");
        }
        n.appendChild(domDocument.createTextNode(text));
        changed = true;
    }

    /**
     * Gets the PdfReader used by this instance.
     *
     * @return the PdfReader used by this instance
     */
    public PdfReader getReader() {
        return reader;
    }

    /**
     * Sets the PdfReader to be used by this instance.
     *
     * @param reader the PdfReader to be used by this instance
     */
    public void setReader(PdfReader reader) {
        this.reader = reader;
    }

    /**
     * Checks if this XFA form was changed.
     *
     * @return true if this XFA form was changed
     */
    public boolean isChanged() {
        return changed;
    }

    /**
     * Sets the changed status of this XFA instance.
     *
     * @param changed the changed status of this XFA instance
     */
    public void setChanged(boolean changed) {
        this.changed = changed;
    }

    /**
     * Gets the class that contains the template processing section of the XFA.
     *
     * @return the class that contains the template processing section of the XFA
     */
    public Xml2SomTemplate getTemplateSom() {
        return templateSom;
    }

    /**
     * Sets the class that contains the template processing section of the XFA
     *
     * @param templateSom the class that contains the template processing section of the XFA
     */
    public void setTemplateSom(Xml2SomTemplate templateSom) {
        this.templateSom = templateSom;
    }

    /**
     * Gets the class that contains the datasets processing section of the XFA.
     *
     * @return the class that contains the datasets processing section of the XFA
     */
    public Xml2SomDatasets getDatasetsSom() {
        return datasetsSom;
    }

    /**
     * Sets the class that contains the datasets processing section of the XFA.
     *
     * @param datasetsSom the class that contains the datasets processing section of the XFA
     */
    public void setDatasetsSom(Xml2SomDatasets datasetsSom) {
        this.datasetsSom = datasetsSom;
    }

    /**
     * Gets the class that contains the "classic" fields processing.
     *
     * @return the class that contains the "classic" fields processing
     */
    public AcroFieldsSearch getAcroFieldsSom() {
        return acroFieldsSom;
    }

    /**
     * Sets the class that contains the "classic" fields processing.
     *
     * @param acroFieldsSom the class that contains the "classic" fields processing
     */
    public void setAcroFieldsSom(AcroFieldsSearch acroFieldsSom) {
        this.acroFieldsSom = acroFieldsSom;
    }

    /**
     * Gets the Node that corresponds to the datasets part.
     *
     * @return the Node that corresponds to the datasets part
     */
    public Node getDatasetsNode() {
        return datasetsNode;
    }

    public void fillXfaForm(File file) throws ParserConfigurationException, SAXException, IOException {
        fillXfaForm(new FileInputStream(file));
    }

    public void fillXfaForm(InputStream is) throws ParserConfigurationException, SAXException, IOException {
        fillXfaForm(new InputSource(is));
    }

    public void fillXfaForm(InputSource is) throws ParserConfigurationException, SAXException, IOException {
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        DocumentBuilder db = dbf.newDocumentBuilder();
        db.setEntityResolver((publicId, systemId) -> new InputSource(new StringReader("")));
        Document newdoc = db.parse(is);
        fillXfaForm(newdoc.getDocumentElement());
    }

    /**
     * Replaces the data under datasets/data.
     *
     * @param node node to fill data under
     * @since iText 5.0.0
     */
    public void fillXfaForm(Node node) {
        Node data = datasetsNode.getFirstChild();
        NodeList list = data.getChildNodes();
        if (list.getLength() == 0) {
            data.appendChild(domDocument.importNode(node, true));
        } else {
            data.replaceChild(domDocument.importNode(node, true), data.getFirstChild());
        }
        extractNodes();
        setChanged(true);
    }

    /**
     * A structure to store each part of a SOM name and link it to the next part beginning from the lower hierarchy.
     */
    public static class InverseStore {

        protected List part = new ArrayList<>();
        protected List