com.itextpdf.forms.xfa.XfaForm Maven / Gradle / Ivy
The newest version!
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
package com.itextpdf.forms.xfa;
import com.itextpdf.commons.utils.FileUtil;
import com.itextpdf.forms.PdfAcroForm;
import com.itextpdf.forms.fields.PdfFormCreator;
import com.itextpdf.kernel.exceptions.PdfException;
import com.itextpdf.kernel.pdf.PdfArray;
import com.itextpdf.kernel.pdf.PdfDictionary;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfObject;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.PdfVersion;
import com.itextpdf.kernel.pdf.VersionConforming;
import com.itextpdf.kernel.utils.XmlProcessorCreator;
import com.itextpdf.kernel.xmp.XmlDomWriter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* Processes XFA forms.
*/
public class XfaForm {
private static final int INIT_SERIALIZER_BUFFER_SIZE = 16 * 1024;
private Node templateNode;
private Xml2SomDatasets datasetsSom;
private Node datasetsNode;
private AcroFieldsSearch acroFieldsSom;
private boolean xfaPresent = false;
private org.w3c.dom.Document domDocument;
/**
* The URI for the XFA Data schema.
*/
public static final String XFA_DATA_SCHEMA = "http://www.xfa.org/schema/xfa-data/1.0/";
/**
* An empty constructor to build on.
*/
public XfaForm() {
this(new ByteArrayInputStream(" ".getBytes(StandardCharsets.UTF_8)));
}
/**
* Creates an XFA form by the stream containing all xml information
*
* @param inputStream the InputStream
*/
public XfaForm(InputStream inputStream) {
try {
initXfaForm(inputStream);
} catch (Exception e) {
throw new PdfException(e.getMessage(), e);
}
}
/**
* Creates an XFA form by the {@link Document} containing all xml information
* @param domDocument The document
*/
public XfaForm(Document domDocument) {
setDomDocument(domDocument);
}
/**
* A constructor from a {@link PdfDictionary}. It is assumed, but not
* necessary for correct initialization, that the dictionary is actually a
* {@link PdfAcroForm}. An entry in the dictionary with the XFA
* key must contain correct XFA syntax. If the XFA
key is
* absent, then the constructor essentially does nothing.
*
* @param acroFormDictionary the dictionary object to initialize from
*/
public XfaForm(PdfDictionary acroFormDictionary) {
PdfObject xfa = acroFormDictionary.get(PdfName.XFA);
if (xfa != null) {
try {
initXfaForm(xfa);
} catch (Exception e) {
throw new PdfException(e.getMessage(), e);
}
}
}
/**
* A constructor from a PdfDocument
. It basically does everything
* from finding the XFA stream to the XML parsing.
*
* @param pdfDocument the PdfDocument instance
*/
public XfaForm(PdfDocument pdfDocument) {
PdfObject xfa = getXfaObject(pdfDocument);
if (xfa != null) {
try {
initXfaForm(xfa);
} catch (Exception e) {
throw new PdfException(e.getMessage(), e);
}
}
}
/**
* Sets the XFA key from a byte array. The old XFA is erased.
*
* @param form the data
* @param pdfDocument pdfDocument
* @throws java.io.IOException if any I/O issue occurs
*/
public static void setXfaForm(XfaForm form, PdfDocument pdfDocument) throws IOException {
PdfAcroForm acroForm = PdfFormCreator.getAcroForm(pdfDocument, true);
setXfaForm(form, acroForm);
}
/**
* Sets the XFA key from a byte array. The old XFA is erased.
*
* @param form the data
* @param acroForm an {@link PdfAcroForm} instance
* @throws java.io.IOException if any I/O issue occurs
*/
public static void setXfaForm(XfaForm form, PdfAcroForm acroForm) throws IOException {
if (form == null || acroForm == null || acroForm.getPdfDocument() == null) {
throw new IllegalArgumentException("XfaForm, PdfAcroForm and PdfAcroForm's document shall not be null");
}
PdfDocument document = acroForm.getPdfDocument();
if (VersionConforming.validatePdfVersionForDeprecatedFeatureLogError(document, PdfVersion.PDF_2_0, VersionConforming.DEPRECATED_XFA_FORMS)) {
return;
}
PdfObject xfa = getXfaObject(acroForm);
if (xfa != null && xfa.isArray()) {
PdfArray ar = (PdfArray) xfa;
int t = -1;
int d = -1;
for (int k = 0; k < ar.size(); k += 2) {
PdfString s = ar.getAsString(k);
if ("template".equals(s.toString())) {
t = k + 1;
}
if ("datasets".equals(s.toString())) {
d = k + 1;
}
}
if (t > -1 && d > -1) {
//reader.killXref(ar.getAsIndirectObject(t));
//reader.killXref(ar.getAsIndirectObject(d));
PdfStream tStream = new PdfStream(serializeDocument(form.templateNode));
tStream.setCompressionLevel(document.getWriter().getCompressionLevel());
ar.set(t, tStream);
PdfStream dStream = new PdfStream(serializeDocument(form.datasetsNode));
dStream.setCompressionLevel(document.getWriter().getCompressionLevel());
ar.set(d, dStream);
ar.setModified();
ar.flush();
acroForm.put(PdfName.XFA, new PdfArray(ar));
acroForm.setModified();
if (!acroForm.getPdfObject().isIndirect()) {
document.getCatalog().setModified();
}
return;
}
}
//reader.killXref(af.get(PdfName.XFA));
PdfStream stream = new PdfStream(serializeDocument(form.domDocument));
stream.setCompressionLevel(document.getWriter().getCompressionLevel());
stream.flush();
acroForm.put(PdfName.XFA, stream);
acroForm.setModified();
if (!acroForm.getPdfObject().isIndirect()) {
document.getCatalog().setModified();
}
}
/**
* Extracts DOM nodes from an XFA document.
*
* @param domDocument an XFA file as a {@link org.w3c.dom.Document DOM
* document}
* @return a {@link Map} of XFA packet names and their associated
* {@link org.w3c.dom.Node DOM nodes}
*/
public static Map extractXFANodes(Document domDocument) {
Map xfaNodes = new HashMap<>();
Node n = domDocument.getFirstChild();
while (n.getChildNodes().getLength() == 0) {
n = n.getNextSibling();
}
n = n.getFirstChild();
while (n != null) {
if (n.getNodeType() == Node.ELEMENT_NODE) {
String s = n.getLocalName();
xfaNodes.put(s, n);
}
n = n.getNextSibling();
}
return xfaNodes;
}
/**
* Write the XfaForm to the provided {@link PdfDocument}.
*
* @param document the PdfDocument to write the XFA Form to
* @throws IOException if any I/O issue occurs
*/
public void write(PdfDocument document) throws IOException {
setXfaForm(this, document);
}
/**
* Write the XfaForm to the provided {@link PdfAcroForm}.
*
* @param acroForm the PdfDocument to write the XFA Form to
* @throws IOException if any I/O issue occurs
*/
public void write(PdfAcroForm acroForm) throws IOException {
setXfaForm(this, acroForm);
}
/**
* Changes a field value in the XFA form.
*
* @param name the name of the field to be changed
* @param value the new value
*/
public void setXfaFieldValue(String name, String value) {
if (isXfaPresent()) {
name = findFieldName(name);
if (name != null) {
String shortName = Xml2Som.getShortName(name);
Node xn = findDatasetsNode(shortName);
if (xn == null) {
xn = datasetsSom.insertNode(getDatasetsNode(), shortName);
}
setNodeText(xn, value);
}
}
}
/**
* Gets the xfa field value.
*
* @param name the fully qualified field name
* @return the field value
*/
public String getXfaFieldValue(String name) {
if (isXfaPresent()) {
name = findFieldName(name);
if (name != null) {
name = Xml2Som.getShortName(name);
return XfaForm.getNodeText(findDatasetsNode(name));
}
}
return null;
}
/**
* Returns true
if it is a XFA form.
*
* @return true
if it is a XFA form
*/
public boolean isXfaPresent() {
return xfaPresent;
}
/**
* Finds the complete field name from a partial name.
*
* @param name the complete or partial name
* @return the complete name or null
if not found
*/
public String findFieldName(String name) {
if (acroFieldsSom == null && xfaPresent && datasetsSom != null ) {
acroFieldsSom = new AcroFieldsSearch(datasetsSom.getName2Node().keySet());
}
if (acroFieldsSom != null && xfaPresent) {
return acroFieldsSom.getAcroShort2LongName().containsKey(name) ? acroFieldsSom.getAcroShort2LongName().get(name) : acroFieldsSom.inverseSearchGlobal(Xml2Som.splitParts(name));
}
return null;
}
/**
* Finds the complete SOM name contained in the datasets section from a
* possibly partial name.
*
* @param name the complete or partial name
* @return the complete name or null
if not found
*/
public String findDatasetsName(String name) {
return datasetsSom.getName2Node().containsKey(name) ? name : datasetsSom.inverseSearchGlobal(Xml2Som.splitParts(name));
}
/**
* Finds the Node
contained in the datasets section from a
* possibly partial name.
*
* @param name the complete or partial name
* @return the Node
or null
if not found
*/
public Node findDatasetsNode(String name) {
if (name == null)
return null;
name = findDatasetsName(name);
if (name == null)
return null;
return datasetsSom.getName2Node().get(name);
}
/**
* Gets all the text contained in the child nodes of this node.
*
* @param n the Node
* @return the text found or "" if no text was found
*/
public static String getNodeText(Node n) {
return n == null ? "" : getNodeText(n, "");
}
/**
* Gets all the text contained in the child nodes of the node under the provided path.
*
* @param path path to the node to extract text in the format "some.path.to.node"
*
* @return text found under the provided path or {@code null} if node or text wasn't found
*/
public String getNodeTextByPath(String path) {
if (!xfaPresent) {
return null;
}
Xml2SomDatasets nodeSom = new Xml2SomDatasets(domDocument);
AcroFieldsSearch nodeFieldsSom = new AcroFieldsSearch(nodeSom.getName2Node().keySet());
String foundPath = nodeFieldsSom.inverseSearchGlobal(Xml2Som.splitParts(path));
if (foundPath != null) {
Node resultNode = nodeSom.getName2Node().get(foundPath);
return XfaForm.getNodeText(resultNode);
}
return null;
}
/**
* Sets the text of this node. All the child's node are deleted and a new
* child text node is created.
*
* @param n the Node
to add the text to
* @param text the text to add
*/
public void setNodeText(Node n, String text) {
if (n == null)
return;
Node nc = null;
while ((nc = n.getFirstChild()) != null) {
n.removeChild(nc);
}
if (n.getAttributes().getNamedItemNS(XFA_DATA_SCHEMA, "dataNode") != null)
n.getAttributes().removeNamedItemNS(XFA_DATA_SCHEMA, "dataNode");
n.appendChild(domDocument.createTextNode(text));
}
/**
* Gets the top level DOM document.
*
* @return the top level DOM document
*/
public Document getDomDocument() {
return domDocument;
}
/**
* Sets the top DOM document.
*
* @param domDocument the top DOM document
*/
public void setDomDocument(org.w3c.dom.Document domDocument) {
this.domDocument = domDocument;
extractNodes();
}
/**
* Gets the Node
that corresponds to the datasets part.
*
* @return the Node
that corresponds to the datasets part
*/
public Node getDatasetsNode() {
return datasetsNode;
}
/**
* Replaces the XFA data under datasets/data. Accepts a {@link File file
* object} to fill this object with XFA data. The resulting DOM document may
* be modified.
*
* @param file the {@link File}
* @throws java.io.IOException if any I/O issue occurs on the {@link InputSource}
*/
public void fillXfaForm(File file) throws IOException {
fillXfaForm(file, false);
}
/**
* Replaces the XFA data under datasets/data. Accepts a {@link File file
* object} to fill this object with XFA data.
*
* @param file the {@link File}
* @param readOnly whether or not the resulting DOM document may be modified
* @throws java.io.IOException if any I/O issue occurs on the {@link InputSource}
*/
public void fillXfaForm(File file, boolean readOnly) throws IOException {
fillXfaForm(FileUtil.getInputStreamForFile(file), readOnly);
}
/**
* Replaces the XFA data under datasets/data. Accepts an {@link InputStream}
* to fill this object with XFA data. The resulting DOM document may be
* modified.
*
* @param is the {@link InputStream}
* @throws java.io.IOException if any I/O issue occurs on the {@link InputSource}
*/
public void fillXfaForm(InputStream is) throws IOException {
fillXfaForm(is, false);
}
/**
* Replaces the XFA data under datasets/data. Accepts an {@link InputStream}
* to fill this object with XFA data.
*
* @param is the {@link InputStream}
* @param readOnly whether or not the resulting DOM document may be modified
* @throws java.io.IOException if any I/O issue occurs on the {@link InputSource}
*/
public void fillXfaForm(InputStream is, boolean readOnly) throws IOException {
fillXfaForm(new InputSource(is), readOnly);
}
/**
* Replaces the XFA data under datasets/data. Accepts a {@link InputSource
* SAX input source} to fill this object with XFA data. The resulting DOM
* document may be modified.
*
* @param is the {@link InputSource SAX input source}
* @throws java.io.IOException if any I/O issue occurs on the {@link InputSource}
*/
public void fillXfaForm(InputSource is) throws IOException {
fillXfaForm(is, false);
}
/**
* Replaces the XFA data under datasets/data. Accepts a {@link InputSource
* SAX input source} to fill this object with XFA data.
*
* @param is the {@link InputSource SAX input source}
* @param readOnly whether or not the resulting DOM document may be modified
* @throws java.io.IOException if any I/O issue occurs on the {@link InputSource}
*/
public void fillXfaForm(InputSource is, boolean readOnly) throws IOException {
try {
DocumentBuilder db = XmlProcessorCreator.createSafeDocumentBuilder(false, false);
Document newdoc = db.parse(is);
fillXfaForm(newdoc.getDocumentElement(), readOnly);
} catch (SAXException e) {
throw new PdfException(e.getMessage(), e);
}
}
/**
* Replaces the XFA data under datasets/data.
*
* @param node the input {@link org.w3c.dom.Node}
*/
public void fillXfaForm(Node node) {
fillXfaForm(node, false);
}
/**
* Replaces the XFA data under datasets/data.
*
* @param node the input {@link org.w3c.dom.Node}
* @param readOnly whether or not the resulting DOM document may be modified
*/
public void fillXfaForm(Node node, boolean readOnly) {
if (readOnly) {
NodeList nodeList = domDocument.getElementsByTagName("field");
for (int i = 0; i < nodeList.getLength(); i++) {
((Element) nodeList.item(i)).setAttribute("access", "readOnly");
}
}
NodeList allChilds = datasetsNode.getChildNodes();
int len = allChilds.getLength();
Node data = null;
for (int k = 0; k < len; ++k) {
Node n = allChilds.item(k);
if (n.getNodeType() == Node.ELEMENT_NODE && n.getLocalName().equals("data") && XFA_DATA_SCHEMA.equals(n.getNamespaceURI())) {
data = n;
break;
}
}
if (data == null) {
data = datasetsNode.getOwnerDocument().createElementNS(XFA_DATA_SCHEMA, "xfa:data");
datasetsNode.appendChild(data);
}
NodeList list = data.getChildNodes();
if (list.getLength() == 0) {
data.appendChild(domDocument.importNode(node, true));
} else {
// There's a possibility that first child node of XFA data is not an ELEMENT but simply a TEXT. In this case data will be duplicated.
// data.replaceChild(domDocument.importNode(node, true), data.getFirstChild());
Node firstNode = getFirstElementNode(data);
if (firstNode != null)
data.replaceChild(domDocument.importNode(node, true), firstNode);
}
extractNodes();
}
private static String getNodeText(Node n, String name) {
Node n2 = n.getFirstChild();
while (n2 != null) {
if (n2.getNodeType() == Node.ELEMENT_NODE) {
name = getNodeText(n2, name);
} else if (n2.getNodeType() == Node.TEXT_NODE) {
name += n2.getNodeValue();
}
n2 = n2.getNextSibling();
}
return name;
}
/**
* Return the XFA Object, could be an array, could be a Stream.
* Returns null if no XFA Object is present.
*
* @param pdfDocument a PdfDocument instance
* @return the XFA object
*/
private static PdfObject getXfaObject(PdfDocument pdfDocument) {
PdfDictionary af = pdfDocument.getCatalog().getPdfObject().getAsDictionary(PdfName.AcroForm);
return af == null ? null : af.get(PdfName.XFA);
}
/**
* Return the XFA Object, could be an array, could be a Stream.
* Returns null if no XFA Object is present.
*
* @param acroForm a PdfDocument instance
* @return the XFA object
*/
private static PdfObject getXfaObject(PdfAcroForm acroForm) {
return acroForm == null || acroForm.getPdfObject() == null ? null : acroForm.getPdfObject().get(PdfName.XFA);
}
/**
* Serializes a XML document to a byte array.
*
* @param n the XML document
* @return the serialized XML document
* @throws java.io.IOException if any I/O issue occurs
*/
private static byte[] serializeDocument(Node n) throws IOException {
XmlDomWriter xw = new XmlDomWriter(false);
ByteArrayOutputStream fout = new ByteArrayOutputStream(INIT_SERIALIZER_BUFFER_SIZE);
xw.setOutput(fout, null);
xw.write(n);
fout.close();
return fout.toByteArray();
}
private void initXfaForm(PdfObject xfa) throws IOException, ParserConfigurationException, SAXException {
ByteArrayOutputStream bout = new ByteArrayOutputStream();
if (xfa.isArray()) {
PdfArray ar = (PdfArray) xfa;
for (int k = 1; k < ar.size(); k += 2) {
PdfObject ob = ar.get(k);
if (ob instanceof PdfStream) {
byte[] b = ((PdfStream) ob).getBytes();
bout.write(b);
}
}
} else if (xfa instanceof PdfStream) {
byte[] b = ((PdfStream) xfa).getBytes();
bout.write(b);
}
bout.close();
initXfaForm(new ByteArrayInputStream(bout.toByteArray()));
}
private void initXfaForm(InputStream inputStream) throws IOException, SAXException {
DocumentBuilder db = XmlProcessorCreator.createSafeDocumentBuilder(true, false);
setDomDocument(db.parse(inputStream));
xfaPresent = true;
}
/**
* Extracts the nodes from the domDocument.
*/
private void extractNodes() {
Map xfaNodes = extractXFANodes(domDocument);
if (xfaNodes.containsKey("template")) {
templateNode = xfaNodes.get("template");
}
if (xfaNodes.containsKey("datasets")) {
datasetsNode = xfaNodes.get("datasets");
Node dataNode = findDataNode(datasetsNode);
datasetsSom = new Xml2SomDatasets(dataNode != null ? dataNode : datasetsNode.getFirstChild());
}
if (datasetsNode == null)
createDatasetsNode(domDocument.getFirstChild());
}
/**
* Some XFA forms don't have a datasets node.
* If this is the case, we have to add one.
*/
private void createDatasetsNode(Node n) {
while (n != null && n.getChildNodes().getLength() == 0) {
n = n.getNextSibling();
}
if (n != null) {
Element e = n.getOwnerDocument().createElement("xfa:datasets");
e.setAttribute("xmlns:xfa", XFA_DATA_SCHEMA);
datasetsNode = e;
n.appendChild(datasetsNode);
}
}
private Node getFirstElementNode(Node src) {
Node result = null;
NodeList list = src.getChildNodes();
for (int i = 0; i < list.getLength(); i++) {
if (list.item(i).getNodeType() == Node.ELEMENT_NODE) {
result = list.item(i);
break;
}
}
return result;
}
private Node findDataNode(Node datasetsNode) {
NodeList childNodes = datasetsNode.getChildNodes();
for (int i = 0; i < childNodes.getLength(); i++) {
if (childNodes.item(i).getNodeName().equals("xfa:data")) {
return childNodes.item(i);
}
}
return null;
}
}