com.itextpdf.forms.xfa.XfaForm Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of forms Show documentation
The newest version!
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2024 Apryse Group NV
    Authors: Apryse Software.

    This program is offered under a commercial and under the AGPL license.
    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.

    AGPL licensing:
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see .
 */
package com.itextpdf.forms.xfa;

import com.itextpdf.commons.utils.FileUtil;
import com.itextpdf.forms.PdfAcroForm;
import com.itextpdf.forms.fields.PdfFormCreator;
import com.itextpdf.kernel.exceptions.PdfException;
import com.itextpdf.kernel.pdf.PdfArray;
import com.itextpdf.kernel.pdf.PdfDictionary;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfObject;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.PdfVersion;
import com.itextpdf.kernel.pdf.VersionConforming;
import com.itextpdf.kernel.utils.XmlProcessorCreator;
import com.itextpdf.kernel.xmp.XmlDomWriter;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/**
 * Processes XFA forms.
 */
public class XfaForm {

    private static final int INIT_SERIALIZER_BUFFER_SIZE = 16 * 1024;

    private Node templateNode;
    private Xml2SomDatasets datasetsSom;
    private Node datasetsNode;
    private AcroFieldsSearch acroFieldsSom;
    private boolean xfaPresent = false;
    private org.w3c.dom.Document domDocument;

    /**
     * The URI for the XFA Data schema.
     */
    public static final String XFA_DATA_SCHEMA = "http://www.xfa.org/schema/xfa-data/1.0/";

    /**
     * An empty constructor to build on.
     */
    public XfaForm() {
        this(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)));
    }

    /**
     * Creates an XFA form by the stream containing all xml information
     *
     * @param inputStream the InputStream
     */
    public XfaForm(InputStream inputStream) {
        try {
            initXfaForm(inputStream);
        } catch (Exception e) {
            throw new PdfException(e.getMessage(), e);
        }
    }

    /**
     * Creates an XFA form by the {@link Document} containing all xml information
     * @param domDocument The document
     */
    public XfaForm(Document domDocument) {
        setDomDocument(domDocument);
    }

    /**
     * A constructor from a {@link PdfDictionary}. It is assumed, but not
     * necessary for correct initialization, that the dictionary is actually a
     * {@link PdfAcroForm}. An entry in the dictionary with the XFA
     * key must contain correct XFA syntax. If the XFA key is
     * absent, then the constructor essentially does nothing.
     *
     * @param acroFormDictionary the dictionary object to initialize from
     */
    public XfaForm(PdfDictionary acroFormDictionary) {
        PdfObject xfa = acroFormDictionary.get(PdfName.XFA);
        if (xfa != null) {
            try {
                initXfaForm(xfa);
            } catch (Exception e) {
                throw new PdfException(e.getMessage(), e);
            }
        }
    }

    /**
     * A constructor from a PdfDocument. It basically does everything
     * from finding the XFA stream to the XML parsing.
     *
     * @param pdfDocument the PdfDocument instance
     */
    public XfaForm(PdfDocument pdfDocument) {
        PdfObject xfa = getXfaObject(pdfDocument);
        if (xfa != null) {
            try {
                initXfaForm(xfa);
            } catch (Exception e) {
                throw new PdfException(e.getMessage(), e);
            }
        }
    }

    /**
     * Sets the XFA key from a byte array. The old XFA is erased.
     *
     * @param form        the data
     * @param pdfDocument pdfDocument
     * @throws java.io.IOException if any I/O issue occurs
     */
    public static void setXfaForm(XfaForm form, PdfDocument pdfDocument) throws IOException {
        PdfAcroForm acroForm = PdfFormCreator.getAcroForm(pdfDocument, true);
        setXfaForm(form, acroForm);
    }

    /**
     * Sets the XFA key from a byte array. The old XFA is erased.
     *
     * @param form     the data
     * @param acroForm an {@link PdfAcroForm} instance
     * @throws java.io.IOException if any I/O issue occurs
     */
    public static void setXfaForm(XfaForm form, PdfAcroForm acroForm) throws IOException {
        if (form == null || acroForm == null || acroForm.getPdfDocument() == null) {
            throw new IllegalArgumentException("XfaForm, PdfAcroForm and PdfAcroForm's document shall not be null");
        }
        PdfDocument document = acroForm.getPdfDocument();
        if (VersionConforming.validatePdfVersionForDeprecatedFeatureLogError(document, PdfVersion.PDF_2_0, VersionConforming.DEPRECATED_XFA_FORMS)) {
            return;
        }
        PdfObject xfa = getXfaObject(acroForm);
        if (xfa != null && xfa.isArray()) {
            PdfArray ar = (PdfArray) xfa;
            int t = -1;
            int d = -1;
            for (int k = 0; k < ar.size(); k += 2) {
                PdfString s = ar.getAsString(k);
                if ("template".equals(s.toString())) {
                    t = k + 1;
                }
                if ("datasets".equals(s.toString())) {
                    d = k + 1;
                }
            }
            if (t > -1 && d > -1) {
                //reader.killXref(ar.getAsIndirectObject(t));
                //reader.killXref(ar.getAsIndirectObject(d));
                PdfStream tStream = new PdfStream(serializeDocument(form.templateNode));
                tStream.setCompressionLevel(document.getWriter().getCompressionLevel());
                ar.set(t, tStream);
                PdfStream dStream = new PdfStream(serializeDocument(form.datasetsNode));
                dStream.setCompressionLevel(document.getWriter().getCompressionLevel());
                ar.set(d, dStream);
                ar.setModified();
                ar.flush();
                acroForm.put(PdfName.XFA, new PdfArray(ar));
                acroForm.setModified();
                if (!acroForm.getPdfObject().isIndirect()) {
                    document.getCatalog().setModified();
                }
                return;
            }
        }
        //reader.killXref(af.get(PdfName.XFA));
        PdfStream stream = new PdfStream(serializeDocument(form.domDocument));
        stream.setCompressionLevel(document.getWriter().getCompressionLevel());
        stream.flush();
        acroForm.put(PdfName.XFA, stream);
        acroForm.setModified();
        if (!acroForm.getPdfObject().isIndirect()) {
            document.getCatalog().setModified();
        }
    }

    /**
     * Extracts DOM nodes from an XFA document.
     *
     * @param domDocument an XFA file as a {@link org.w3c.dom.Document DOM
     *                    document}
     * @return a {@link Map} of XFA packet names and their associated
     * {@link org.w3c.dom.Node DOM nodes}
     */
    public static Map extractXFANodes(Document domDocument) {
        Map xfaNodes = new HashMap<>();
        Node n = domDocument.getFirstChild();
        while (n.getChildNodes().getLength() == 0) {
            n = n.getNextSibling();
        }
        n = n.getFirstChild();
        while (n != null) {
            if (n.getNodeType() == Node.ELEMENT_NODE) {
                String s = n.getLocalName();
                xfaNodes.put(s, n);
            }
            n = n.getNextSibling();
        }

        return xfaNodes;
    }

    /**
     * Write the XfaForm to the provided {@link PdfDocument}.
     *
     * @param document the PdfDocument to write the XFA Form to
     * @throws IOException if any I/O issue occurs
     */
    public void write(PdfDocument document) throws IOException {
        setXfaForm(this, document);
    }

    /**
     * Write the XfaForm to the provided {@link PdfAcroForm}.
     *
     * @param acroForm the PdfDocument to write the XFA Form to
     * @throws IOException if any I/O issue occurs
     */
    public void write(PdfAcroForm acroForm) throws IOException {
        setXfaForm(this, acroForm);
    }

    /**
     * Changes a field value in the XFA form.
     *
     * @param name  the name of the field to be changed
     * @param value the new value
     */
    public void setXfaFieldValue(String name, String value) {
        if (isXfaPresent()) {
            name = findFieldName(name);
            if (name != null) {
                String shortName = Xml2Som.getShortName(name);
                Node xn = findDatasetsNode(shortName);
                if (xn == null) {
                    xn = datasetsSom.insertNode(getDatasetsNode(), shortName);
                }
                setNodeText(xn, value);
            }
        }
    }

    /**
     * Gets the xfa field value.
     *
     * @param name the fully qualified field name
     * @return the field value
     */
    public String getXfaFieldValue(String name) {
        if (isXfaPresent()) {
            name = findFieldName(name);
            if (name != null) {

                name = Xml2Som.getShortName(name);
                return XfaForm.getNodeText(findDatasetsNode(name));
            }
        }
        return null;
    }

    /**
     * Returns true if it is a XFA form.
     *
     * @return true if it is a XFA form
     */
    public boolean isXfaPresent() {
        return xfaPresent;
    }

    /**
     * Finds the complete field name from a partial name.
     *
     * @param name the complete or partial name
     * @return the complete name or null if not found
     */
    public String findFieldName(String name) {
        if (acroFieldsSom == null && xfaPresent && datasetsSom != null ) {
            acroFieldsSom = new AcroFieldsSearch(datasetsSom.getName2Node().keySet());
        }

        if (acroFieldsSom != null && xfaPresent) {
            return acroFieldsSom.getAcroShort2LongName().containsKey(name) ? acroFieldsSom.getAcroShort2LongName().get(name) : acroFieldsSom.inverseSearchGlobal(Xml2Som.splitParts(name));
        }

        return null;
    }

    /**
     * Finds the complete SOM name contained in the datasets section from a
     * possibly partial name.
     *
     * @param name the complete or partial name
     * @return the complete name or null if not found
     */
    public String findDatasetsName(String name) {
        return datasetsSom.getName2Node().containsKey(name) ? name : datasetsSom.inverseSearchGlobal(Xml2Som.splitParts(name));
    }

    /**
     * Finds the Node contained in the datasets section from a
     * possibly partial name.
     *
     * @param name the complete or partial name
     * @return the Node or null if not found
     */
    public Node findDatasetsNode(String name) {
        if (name == null)
            return null;
        name = findDatasetsName(name);
        if (name == null)
            return null;
        return datasetsSom.getName2Node().get(name);
    }

    /**
     * Gets all the text contained in the child nodes of this node.
     *
     * @param n the Node
     * @return the text found or "" if no text was found
     */
    public static String getNodeText(Node n) {
        return n == null ? "" : getNodeText(n, "");
    }

    /**
     * Gets all the text contained in the child nodes of the node under the provided path.
     *
     * @param path path to the node to extract text in the format "some.path.to.node"
     *
     * @return text found under the provided path or {@code null} if node or text wasn't found
     */
    public String getNodeTextByPath(String path) {
        if (!xfaPresent) {
            return null;
        }
        Xml2SomDatasets nodeSom = new Xml2SomDatasets(domDocument);
        AcroFieldsSearch nodeFieldsSom = new AcroFieldsSearch(nodeSom.getName2Node().keySet());

        String foundPath = nodeFieldsSom.inverseSearchGlobal(Xml2Som.splitParts(path));

        if (foundPath != null) {
            Node resultNode = nodeSom.getName2Node().get(foundPath);
            return XfaForm.getNodeText(resultNode);
        }

        return null;
    }

    /**
     * Sets the text of this node. All the child's node are deleted and a new
     * child text node is created.
     *
     * @param n    the Node to add the text to
     * @param text the text to add
     */
    public void setNodeText(Node n, String text) {
        if (n == null)
            return;
        Node nc = null;
        while ((nc = n.getFirstChild()) != null) {
            n.removeChild(nc);
        }
        if (n.getAttributes().getNamedItemNS(XFA_DATA_SCHEMA, "dataNode") != null)
            n.getAttributes().removeNamedItemNS(XFA_DATA_SCHEMA, "dataNode");
        n.appendChild(domDocument.createTextNode(text));
    }

    /**
     * Gets the top level DOM document.
     *
     * @return the top level DOM document
     */
    public Document getDomDocument() {
        return domDocument;
    }

    /**
     * Sets the top DOM document.
     *
     * @param domDocument the top DOM document
     */
    public void setDomDocument(org.w3c.dom.Document domDocument) {
        this.domDocument = domDocument;
        extractNodes();
    }

    /**
     * Gets the Node that corresponds to the datasets part.
     *
     * @return the Node that corresponds to the datasets part
     */
    public Node getDatasetsNode() {
        return datasetsNode;
    }

    /**
     * Replaces the XFA data under datasets/data. Accepts a {@link File file
     * object} to fill this object with XFA data. The resulting DOM document may
     * be modified.
     *
     * @param file the {@link File}
     * @throws java.io.IOException if any I/O issue occurs on the {@link InputSource}
     */
    public void fillXfaForm(File file) throws IOException {
        fillXfaForm(file, false);
    }

    /**
     * Replaces the XFA data under datasets/data. Accepts a {@link File file
     * object} to fill this object with XFA data.
     *
     * @param file     the {@link File}
     * @param readOnly whether or not the resulting DOM document may be modified
     * @throws java.io.IOException if any I/O issue occurs on the {@link InputSource}
     */
    public void fillXfaForm(File file, boolean readOnly) throws IOException {
        fillXfaForm(FileUtil.getInputStreamForFile(file), readOnly);
    }

    /**
     * Replaces the XFA data under datasets/data. Accepts an {@link InputStream}
     * to fill this object with XFA data. The resulting DOM document may be
     * modified.
     *
     * @param is the {@link InputStream}
     * @throws java.io.IOException if any I/O issue occurs on the {@link InputSource}
     */
    public void fillXfaForm(InputStream is) throws IOException {
        fillXfaForm(is, false);
    }

    /**
     * Replaces the XFA data under datasets/data. Accepts an {@link InputStream}
     * to fill this object with XFA data.
     *
     * @param is       the {@link InputStream}
     * @param readOnly whether or not the resulting DOM document may be modified
     * @throws java.io.IOException if any I/O issue occurs on the {@link InputSource}
     */
    public void fillXfaForm(InputStream is, boolean readOnly) throws IOException {
        fillXfaForm(new InputSource(is), readOnly);
    }

    /**
     * Replaces the XFA data under datasets/data. Accepts a {@link InputSource
     * SAX input source} to fill this object with XFA data. The resulting DOM
     * document may be modified.
     *
     * @param is the {@link InputSource SAX input source}
     * @throws java.io.IOException if any I/O issue occurs on the {@link InputSource}
     */
    public void fillXfaForm(InputSource is) throws IOException {
        fillXfaForm(is, false);
    }

    /**
     * Replaces the XFA data under datasets/data. Accepts a {@link InputSource
     * SAX input source} to fill this object with XFA data.
     *
     * @param is       the {@link InputSource SAX input source}
     * @param readOnly whether or not the resulting DOM document may be modified
     * @throws java.io.IOException if any I/O issue occurs on the {@link InputSource}
     */
    public void fillXfaForm(InputSource is, boolean readOnly) throws IOException {
        try {
            DocumentBuilder db = XmlProcessorCreator.createSafeDocumentBuilder(false, false);
            Document newdoc = db.parse(is);
            fillXfaForm(newdoc.getDocumentElement(), readOnly);
        } catch (SAXException e) {
            throw new PdfException(e.getMessage(), e);
        }
    }

    /**
     * Replaces the XFA data under datasets/data.
     *
     * @param node the input {@link org.w3c.dom.Node}
     */
    public void fillXfaForm(Node node) {
        fillXfaForm(node, false);
    }

    /**
     * Replaces the XFA data under datasets/data.
     *
     * @param node     the input {@link org.w3c.dom.Node}
     * @param readOnly whether or not the resulting DOM document may be modified
     */
    public void fillXfaForm(Node node, boolean readOnly) {
        if (readOnly) {
            NodeList nodeList = domDocument.getElementsByTagName("field");
            for (int i = 0; i < nodeList.getLength(); i++) {
                ((Element) nodeList.item(i)).setAttribute("access", "readOnly");
            }
        }
        NodeList allChilds = datasetsNode.getChildNodes();
        int len = allChilds.getLength();
        Node data = null;
        for (int k = 0; k < len; ++k) {
            Node n = allChilds.item(k);
            if (n.getNodeType() == Node.ELEMENT_NODE && n.getLocalName().equals("data") && XFA_DATA_SCHEMA.equals(n.getNamespaceURI())) {
                data = n;
                break;
            }
        }
        if (data == null) {
            data = datasetsNode.getOwnerDocument().createElementNS(XFA_DATA_SCHEMA, "xfa:data");
            datasetsNode.appendChild(data);
        }
        NodeList list = data.getChildNodes();
        if (list.getLength() == 0) {
            data.appendChild(domDocument.importNode(node, true));
        } else {
            // There's a possibility that first child node of XFA data is not an ELEMENT but simply a TEXT. In this case data will be duplicated.
            // data.replaceChild(domDocument.importNode(node, true), data.getFirstChild());
            Node firstNode = getFirstElementNode(data);
            if (firstNode != null)
                data.replaceChild(domDocument.importNode(node, true), firstNode);
        }
        extractNodes();
    }

    private static String getNodeText(Node n, String name) {
        Node n2 = n.getFirstChild();
        while (n2 != null) {
            if (n2.getNodeType() == Node.ELEMENT_NODE) {
                name = getNodeText(n2, name);
            } else if (n2.getNodeType() == Node.TEXT_NODE) {
                name += n2.getNodeValue();
            }
            n2 = n2.getNextSibling();
        }
        return name;
    }

    /**
     * Return the XFA Object, could be an array, could be a Stream.
     * Returns null if no XFA Object is present.
     *
     * @param pdfDocument a PdfDocument instance
     * @return the XFA object
     */
    private static PdfObject getXfaObject(PdfDocument pdfDocument) {
        PdfDictionary af = pdfDocument.getCatalog().getPdfObject().getAsDictionary(PdfName.AcroForm);
        return af == null ? null : af.get(PdfName.XFA);
    }

    /**
     * Return the XFA Object, could be an array, could be a Stream.
     * Returns null if no XFA Object is present.
     *
     * @param acroForm a PdfDocument instance
     * @return the XFA object
     */
    private static PdfObject getXfaObject(PdfAcroForm acroForm) {
        return acroForm == null || acroForm.getPdfObject() == null ? null : acroForm.getPdfObject().get(PdfName.XFA);
    }

    /**
     * Serializes a XML document to a byte array.
     *
     * @param n the XML document
     * @return the serialized XML document
     * @throws java.io.IOException if any I/O issue occurs
     */
    private static byte[] serializeDocument(Node n) throws IOException {
        XmlDomWriter xw = new XmlDomWriter(false);
        ByteArrayOutputStream fout = new ByteArrayOutputStream(INIT_SERIALIZER_BUFFER_SIZE);
        xw.setOutput(fout, null);
        xw.write(n);
        fout.close();
        return fout.toByteArray();
    }

    private void initXfaForm(PdfObject xfa) throws IOException, ParserConfigurationException, SAXException {
        ByteArrayOutputStream bout = new ByteArrayOutputStream();
        if (xfa.isArray()) {
            PdfArray ar = (PdfArray) xfa;
            for (int k = 1; k < ar.size(); k += 2) {
                PdfObject ob = ar.get(k);
                if (ob instanceof PdfStream) {
                    byte[] b = ((PdfStream) ob).getBytes();
                    bout.write(b);
                }
            }
        } else if (xfa instanceof PdfStream) {
            byte[] b = ((PdfStream) xfa).getBytes();
            bout.write(b);
        }
        bout.close();
        initXfaForm(new ByteArrayInputStream(bout.toByteArray()));
    }

    private void initXfaForm(InputStream inputStream) throws IOException, SAXException {
        DocumentBuilder db = XmlProcessorCreator.createSafeDocumentBuilder(true, false);
        setDomDocument(db.parse(inputStream));
        xfaPresent = true;
    }

    /**
     * Extracts the nodes from the domDocument.
     */
    private void extractNodes() {
        Map xfaNodes = extractXFANodes(domDocument);

        if (xfaNodes.containsKey("template")) {
            templateNode = xfaNodes.get("template");
        }
        if (xfaNodes.containsKey("datasets")) {
            datasetsNode = xfaNodes.get("datasets");
            Node dataNode = findDataNode(datasetsNode);
            datasetsSom = new Xml2SomDatasets(dataNode != null ? dataNode : datasetsNode.getFirstChild());
        }
        if (datasetsNode == null)
            createDatasetsNode(domDocument.getFirstChild());
    }


    /**
     * Some XFA forms don't have a datasets node.
     * If this is the case, we have to add one.
     */
    private void createDatasetsNode(Node n) {
        while (n != null && n.getChildNodes().getLength() == 0) {
            n = n.getNextSibling();
        }
        if (n != null) {
            Element e = n.getOwnerDocument().createElement("xfa:datasets");
            e.setAttribute("xmlns:xfa", XFA_DATA_SCHEMA);
            datasetsNode = e;
            n.appendChild(datasetsNode);
        }
    }

    private Node getFirstElementNode(Node src) {
        Node result = null;
        NodeList list = src.getChildNodes();
        for (int i = 0; i < list.getLength(); i++) {
            if (list.item(i).getNodeType() == Node.ELEMENT_NODE) {
                result = list.item(i);
                break;
            }
        }
        return result;
    }

    private Node findDataNode(Node datasetsNode) {
        NodeList childNodes = datasetsNode.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            if (childNodes.item(i).getNodeName().equals("xfa:data")) {
                return childNodes.item(i);
            }
        }
        return null;
    }
}