org.sejda.impl.sambox.component.PagesExtractor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sejda-sambox Show documentation
Show all versions of sejda-sambox Show documentation
Package containing tasks implemented using sambox.
/*
* This file is part of the Sejda source code
* Copyright 2015 by Andrea Vacondio ([email protected]).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package org.sejda.impl.sambox.component;
import static java.util.Optional.ofNullable;
import static org.sejda.common.ComponentsUtility.nullSafeCloseQuietly;
import static org.sejda.core.notification.dsl.ApplicationEventsNotifier.notifyEvent;
import static org.sejda.impl.sambox.component.SignatureClipper.clipSignatures;
import java.io.Closeable;
import java.io.File;
import java.util.Objects;
import java.util.Set;
import org.sejda.common.LookupTable;
import org.sejda.impl.sambox.component.optimization.ResourceDictionaryCleaner;
import org.sejda.impl.sambox.component.optimization.ResourcesHitter;
import org.sejda.model.exception.TaskCancelledException;
import org.sejda.model.exception.TaskException;
import org.sejda.model.exception.TaskExecutionException;
import org.sejda.model.pdf.PdfVersion;
import org.sejda.model.pdf.form.AcroFormPolicy;
import org.sejda.model.task.TaskExecutionContext;
import org.sejda.sambox.cos.COSDictionary;
import org.sejda.sambox.cos.COSName;
import org.sejda.sambox.pdmodel.PDDocument;
import org.sejda.sambox.pdmodel.PDPage;
import org.sejda.sambox.pdmodel.PDResources;
import org.sejda.sambox.pdmodel.PageNotFoundException;
import org.sejda.sambox.pdmodel.interactive.annotation.PDAnnotation;
import org.sejda.sambox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Component that retains pages from a given existing {@link PDDocument} and saves a new document containing retained pages and an outline that patches the new document.
*
* @author Andrea Vacondio
*
*/
public class PagesExtractor implements Closeable {
private static final Logger LOG = LoggerFactory.getLogger(PagesExtractor.class);
private OutlineDistiller outlineMerger;
private AcroFormsMerger acroFormsMerger;
private PDDocument origin;
private PDDocumentHandler destinationDocument;
private LookupTable pagesLookup = new LookupTable<>();
public PagesExtractor(PDDocument origin) {
this.origin = origin;
init();
}
private void init() {
this.outlineMerger = new OutlineDistiller(origin);
this.destinationDocument = new PDDocumentHandler();
this.destinationDocument.initialiseBasedOn(origin);
this.acroFormsMerger = new AcroFormsMerger(AcroFormPolicy.MERGE,
this.destinationDocument.getUnderlyingPDDocument());
}
public void retain(Set pages, TaskExecutionContext executionContext)
throws TaskCancelledException, TaskExecutionException {
int currentStep = 0;
for (Integer page : pages) {
executionContext.assertTaskNotCancelled();
retain(page, executionContext);
notifyEvent(executionContext.notifiableTaskMetadata()).stepsCompleted(++currentStep).outOf(pages.size());
}
}
public void retain(int page, TaskExecutionContext executionContext) throws TaskExecutionException {
try {
PDPage existingPage = origin.getPage(page - 1);
pagesLookup.addLookupEntry(existingPage, destinationDocument.importPage(existingPage));
LOG.trace("Imported page number {}", page);
} catch (PageNotFoundException e) {
executionContext.assertTaskIsLenient(e);
notifyEvent(executionContext.notifiableTaskMetadata())
.taskWarning(String.format("Page %d was skipped, could not be processed", page), e);
}
}
public void setVersion(PdfVersion version) {
destinationDocument.setVersionOnPDDocument(version);
}
public void setCompress(boolean compress) {
destinationDocument.setCompress(compress);
}
public void optimize() {
LOG.trace("Optimizing document");
ResourcesHitter hitter = new ResourcesHitter();
pagesLookup.values().forEach(p -> {
// each page must have it's own resource dic and it's own xobject and font name dic
// so we don't optimize shared resource dic or xobjects/fonts name dictionaries
COSDictionary resources = ofNullable(p.getResources().getCOSObject()).map(COSDictionary::duplicate)
.orElseGet(COSDictionary::new);
// resources are cached in the PDPage so make sure they are replaced
p.setResources(new PDResources(resources));
ofNullable(resources.getDictionaryObject(COSName.XOBJECT, COSDictionary.class)).filter(Objects::nonNull)
.map(COSDictionary::duplicate).ifPresent(d -> resources.setItem(COSName.XOBJECT, d));
ofNullable(resources.getDictionaryObject(COSName.FONT, COSDictionary.class)).filter(Objects::nonNull)
.map(COSDictionary::duplicate).ifPresent(d -> resources.setItem(COSName.FONT, d));
hitter.accept(p);
});
new ResourceDictionaryCleaner().accept(destinationDocument.getUnderlyingPDDocument());
}
public void save(File file, boolean discardOutline) throws TaskException {
if (!discardOutline) {
createOutline();
}
LookupTable annotations = new AnnotationsDistiller(origin).retainRelevantAnnotations(pagesLookup);
clipSignatures(annotations.values());
acroFormsMerger.mergeForm(origin.getDocumentCatalog().getAcroForm(), annotations);
ofNullable(acroFormsMerger.getForm()).filter(f -> !f.getFields().isEmpty()).ifPresent(f -> {
LOG.debug("Adding generated AcroForm");
destinationDocument.setDocumentAcroForm(f);
});
destinationDocument.savePDDocument(file);
}
private void createOutline() {
PDDocumentOutline outline = new PDDocumentOutline();
outlineMerger.appendRelevantOutlineTo(outline, pagesLookup);
if (outline.hasChildren()) {
destinationDocument.setDocumentOutline(outline);
}
}
@Override
public void close() {
nullSafeCloseQuietly(destinationDocument);
pagesLookup.clear();
outlineMerger = null;
}
protected PDDocumentHandler destinationDocument() {
return destinationDocument;
}
/**
* Resets the component making it ready to start a new extractions from the original document
*/
public void reset() {
close();
init();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy