org.sejda.impl.sambox.component.AcroFormsMerger Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sejda-sambox Show documentation
Show all versions of sejda-sambox Show documentation
Package containing tasks implemented using sambox.
/*
* Created on 09 set 2015
* Copyright 2015 by Andrea Vacondio ([email protected]).
* This file is part of Sejda.
*
* Sejda is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Sejda is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Sejda. If not, see .
*/
package org.sejda.impl.sambox.component;
import static java.util.Objects.isNull;
import static java.util.Objects.nonNull;
import static java.util.Optional.ofNullable;
import static java.util.stream.Collectors.toList;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.sejda.impl.sambox.component.SignatureClipper.clipSignature;
import static org.sejda.sambox.cos.COSName.A;
import static org.sejda.sambox.cos.COSName.AF;
import static org.sejda.sambox.cos.COSName.AP;
import static org.sejda.sambox.cos.COSName.AS;
import static org.sejda.sambox.cos.COSName.BM;
import static org.sejda.sambox.cos.COSName.BORDER;
import static org.sejda.sambox.cos.COSName.BS;
import static org.sejda.sambox.cos.COSName.C;
import static org.sejda.sambox.cos.COSName.CONTENTS;
import static org.sejda.sambox.cos.COSName.DA;
import static org.sejda.sambox.cos.COSName.DATAPREP;
import static org.sejda.sambox.cos.COSName.DS;
import static org.sejda.sambox.cos.COSName.DV;
import static org.sejda.sambox.cos.COSName.F;
import static org.sejda.sambox.cos.COSName.FF;
import static org.sejda.sambox.cos.COSName.FT;
import static org.sejda.sambox.cos.COSName.H;
import static org.sejda.sambox.cos.COSName.I;
import static org.sejda.sambox.cos.COSName.KIDS;
import static org.sejda.sambox.cos.COSName.LOCK;
import static org.sejda.sambox.cos.COSName.M;
import static org.sejda.sambox.cos.COSName.MAX_LEN;
import static org.sejda.sambox.cos.COSName.MK;
import static org.sejda.sambox.cos.COSName.NM;
import static org.sejda.sambox.cos.COSName.OC;
import static org.sejda.sambox.cos.COSName.OPT;
import static org.sejda.sambox.cos.COSName.P;
import static org.sejda.sambox.cos.COSName.PARENT;
import static org.sejda.sambox.cos.COSName.PMD;
import static org.sejda.sambox.cos.COSName.Q;
import static org.sejda.sambox.cos.COSName.RECT;
import static org.sejda.sambox.cos.COSName.RV;
import static org.sejda.sambox.cos.COSName.STRUCT_PARENT;
import static org.sejda.sambox.cos.COSName.SUBTYPE;
import static org.sejda.sambox.cos.COSName.SV;
import static org.sejda.sambox.cos.COSName.T;
import static org.sejda.sambox.cos.COSName.TI;
import static org.sejda.sambox.cos.COSName.TM;
import static org.sejda.sambox.cos.COSName.TU;
import static org.sejda.sambox.cos.COSName.TYPE;
import static org.sejda.sambox.cos.COSName.V;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.UUID;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import org.sejda.common.LookupTable;
import org.sejda.impl.sambox.util.AcroFormUtils;
import org.sejda.impl.sambox.util.FontUtils;
import org.sejda.model.pdf.form.AcroFormPolicy;
import org.sejda.sambox.cos.COSName;
import org.sejda.sambox.pdmodel.PDDocument;
import org.sejda.sambox.pdmodel.font.PDFont;
import org.sejda.sambox.pdmodel.interactive.annotation.PDAnnotation;
import org.sejda.sambox.pdmodel.interactive.annotation.PDAnnotationWidget;
import org.sejda.sambox.pdmodel.interactive.form.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Component providing methods to merge multiple acroforms together using different strategies.
*
* @author Andrea Vacondio
*/
public class AcroFormsMerger {
private static final Logger LOG = LoggerFactory.getLogger(AcroFormsMerger.class);
private static final COSName[] FIELD_KEYS = { FT, PARENT, KIDS, T, TU, TM, FF, V, DV, DA, Q, DS, RV, OPT, MAX_LEN,
TI, I, LOCK, SV, DATAPREP };
private static final COSName[] WIDGET_KEYS = { TYPE, SUBTYPE, RECT, CONTENTS, P, NM, M, F, AP, AS, BORDER, C,
STRUCT_PARENT, OC, AF, BM, H, MK, A, BS, PMD };
private AcroFormPolicy policy;
private PDAcroForm form;
private String random = Long.toString(UUID.randomUUID().getMostSignificantBits(), 36);
private Long counter = 0L;
private final BiFunction, PDTerminalField> createOrReuseTerminalField = (
PDTerminalField existing, LookupTable fieldsLookup) -> {
PDField previouslyCreated = ofNullable(getMergedField(existing.getFullyQualifiedName()))
.orElseGet(() -> fieldsLookup.lookup(existing));
if (isNull(previouslyCreated)) {
previouslyCreated = PDFieldFactory.createFieldAddingChildToParent(this.form,
existing.getCOSObject().duplicate(),
(PDNonTerminalField) fieldsLookup.lookup(existing.getParent()));
previouslyCreated.getCOSObject().removeItem(COSName.KIDS);
fieldsLookup.addLookupEntry(existing, previouslyCreated);
}
if (!previouslyCreated.isTerminal()) {
LOG.warn("Cannot merge terminal field because a non terminal field with the same name already exists: {}",
existing.getFullyQualifiedName());
return null;
}
return (PDTerminalField) previouslyCreated;
};
private final BiFunction, PDTerminalField> createRenamingTerminalField = (
PDTerminalField existing, LookupTable fieldsLookup) -> {
PDTerminalField newField = (PDTerminalField) PDFieldFactory.createFieldAddingChildToParent(this.form,
existing.getCOSObject().duplicate(), (PDNonTerminalField) fieldsLookup.lookup(existing.getParent()));
if (nonNull(getMergedField(existing.getFullyQualifiedName())) || fieldsLookup.hasLookupFor(existing)) {
newField.setPartialName(String.format("%s%s%d", existing.getPartialName(), random, ++counter));
LOG.info("Existing terminal field renamed from {} to {}", existing.getPartialName(),
newField.getPartialName());
}
newField.getCOSObject().removeItem(COSName.KIDS);
fieldsLookup.addLookupEntry(existing, newField);
return newField;
};
private final BiConsumer> createOrReuseNonTerminalField = (PDField field,
LookupTable fieldsLookup) -> {
if (getMergedField(field.getFullyQualifiedName()) == null && !fieldsLookup.hasLookupFor(field)) {
PDField newField = PDFieldFactory.createFieldAddingChildToParent(this.form,
field.getCOSObject().duplicate(), (PDNonTerminalField) fieldsLookup.lookup(field.getParent()));
newField.getCOSObject().removeItem(COSName.KIDS);
fieldsLookup.addLookupEntry(field, newField);
}
};
private PDField getMergedField(String fullyQualifiedName) {
return ofNullable(fullyQualifiedName).map(form::getField).orElse(null);
}
private final BiConsumer> createRenamingNonTerminalField = (PDField field,
LookupTable fieldsLookup) -> {
PDField newField = PDFieldFactory.createFieldAddingChildToParent(this.form, field.getCOSObject().duplicate(),
(PDNonTerminalField) fieldsLookup.lookup(field.getParent()));
if (getMergedField(field.getFullyQualifiedName()) != null || fieldsLookup.hasLookupFor(field)) {
newField.setPartialName(String.format("%s%s%d", field.getPartialName(), random, ++counter));
LOG.info("Existing non terminal field renamed from {} to {}", field.getPartialName(),
newField.getPartialName());
}
newField.getCOSObject().removeItem(COSName.KIDS);
fieldsLookup.addLookupEntry(field, newField);
};
public AcroFormsMerger(AcroFormPolicy policy, PDDocument destination) {
this.policy = policy;
this.form = new PDAcroForm(destination);
}
/**
* Merge the original form to the current one, considering only fields whose widgets are available in the given lookup table.
*
* @param originalForm
* the form to merge
* @param annotationsLookup
* lookup for relevant annotations
*/
public void mergeForm(PDAcroForm originalForm, LookupTable annotationsLookup) {
if (nonNull(originalForm)) {
if (originalForm.hasXFA()) {
LOG.warn("The AcroForm has XFA resurces which will be ignored");
}
LOG.debug("Merging acroforms with policy {}", policy);
switch (policy) {
case MERGE_RENAMING_EXISTING_FIELDS:
updateForm(originalForm, annotationsLookup, createRenamingTerminalField,
createRenamingNonTerminalField);
break;
case MERGE:
updateForm(originalForm, annotationsLookup, createOrReuseTerminalField, createOrReuseNonTerminalField);
break;
case FLATTEN:
updateForm(originalForm, annotationsLookup, createRenamingTerminalField,
createRenamingNonTerminalField);
flatten();
break;
default:
LOG.debug("Discarding acroform");
}
} else {
LOG.debug("Skipped acroform merge, nothing to merge");
}
}
/**
* For each new widget annotation in the lookup table removes all the Field keys.
*
* @param annotations
*/
private void removeFieldKeysFromWidgets(Collection annotations) {
annotations.stream().map(PDAnnotation::getCOSObject).forEach(a -> a.removeItems(FIELD_KEYS));
LOG.trace("Removed fields keys from widget annotations");
}
private void updateForm(PDAcroForm originalForm, LookupTable annotationsLookup,
BiFunction, PDTerminalField> getTerminalField,
BiConsumer> createNonTerminalField) {
AcroFormUtils.mergeDefaults(originalForm, form);
LookupTable fieldsLookup = new LookupTable<>();
// it must be a pre order visit because we have to process non terminal first otherwise terminal ones won't get a parent
originalForm.getFieldTree().stream().forEach(field -> {
if (!field.isTerminal()) {
createNonTerminalField.accept(field, fieldsLookup);
} else {
List relevantWidgets = findMappedWidgetsFor((PDTerminalField) field,
annotationsLookup);
if (!relevantWidgets.isEmpty()) {
PDTerminalField terminalField = getTerminalField.apply((PDTerminalField) field, fieldsLookup);
if (nonNull(terminalField)) {
removeFieldKeysFromWidgets(relevantWidgets);
for (PDAnnotationWidget widget : relevantWidgets) {
terminalField.addWidgetIfMissing(widget);
}
terminalField.getCOSObject().removeItems(WIDGET_KEYS);
}
} else {
LOG.debug("Discarded not relevant field {}", field.getFullyQualifiedName());
}
}
});
this.form.addFields(originalForm.getFields().stream().map(fieldsLookup::lookup).filter(Objects::nonNull)
.collect(Collectors.toList()));
// let's process those annotations containing merged widget/fields dictionaries and somehow not referenced by originalForm acroform (ex. empty fields array)
annotationsLookup.values().stream().filter(a -> a instanceof PDAnnotationWidget)
.filter(a -> a.getCOSObject().containsKey(COSName.T)).map(a -> (PDAnnotationWidget) a).collect(toList())
.forEach(w -> {
PDField orphanField = PDFieldFactory.createField(originalForm, w.getCOSObject(), null);
if (orphanField instanceof PDTerminalField) {
PDTerminalField newOrphanField = getTerminalField.apply((PDTerminalField) orphanField,
fieldsLookup);
if (nonNull(newOrphanField)) {
w.getCOSObject().removeItems(FIELD_KEYS);
newOrphanField.addWidgetIfMissing(w);
newOrphanField.getCOSObject().removeItems(WIDGET_KEYS);
if (isNull(getMergedField(newOrphanField.getFullyQualifiedName()))) {
this.form.addFields(Arrays.asList(newOrphanField));
}
}
}
});
}
/**
* @param field
* @param annotationsLookup
* @return the list of relevant widgets for the given field.
*/
private List findMappedWidgetsFor(PDTerminalField field,
LookupTable annotationsLookup) {
return field.getWidgets().stream().map(annotationsLookup::lookup).filter(w -> w instanceof PDAnnotationWidget)
.map(w -> (PDAnnotationWidget) w).collect(toList());
}
private void flatten() {
try {
List fields = new ArrayList<>();
for (PDField field : form.getFieldTree()) {
fields.add(field);
if(field instanceof PDVariableText) {
ensureValueCanBeDisplayed((PDVariableText) field);
}
}
form.flatten(fields, true);
} catch (IOException | UnsupportedOperationException ex) {
LOG.warn("Failed to flatten form", ex);
}
}
/**
* Makes sure the string can be displayed using appearances font
*/
private void ensureValueCanBeDisplayed(PDVariableText field) {
String value = field.getValueAsString();
if(!FontUtils.canDisplay(value, field.getAppearanceFont())) {
PDFont fallbackFont = FontUtils.findFontFor(form.getDocument(), value);
field.setAppearanceOverrideFont(fallbackFont);
LOG.debug("Form field can't render (in appearances) it's value '%', will use font % for better support", value, fallbackFont);
}
}
/**
* Performs some cleanup task on the resulting {@link PDAcroForm} and then returns it.
*
* @return
*/
public PDAcroForm getForm() {
for (PDField current : form.getFieldTree()) {
if (!current.isTerminal() && !((PDNonTerminalField) current).hasChildren()) {
LOG.info("Removing non terminal field with no child {}", current.getFullyQualifiedName());
if (nonNull(current.getParent())) {
current.getParent().removeChild(current);
} else {
// it's a root field
form.removeField(current);
}
} else if (clipSignature(current)) {
form.setSignaturesExist(true);
}
}
if (isBlank(form.getDefaultAppearance())) {
form.setDefaultAppearance("/Helv 0 Tf 0 g ");
}
return form;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy