org.sejda.impl.sambox.component.split.AbstractPdfSplitter Maven / Gradle / Ivy
/*
* Copyright 2015 by Edi Weissmann ([email protected])
*
* This file is part of the Sejda source code
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package org.sejda.impl.sambox.component.split;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.sejda.core.notification.dsl.ApplicationEventsNotifier.notifyEvent;
import static org.sejda.core.support.io.IOUtils.createTemporaryBuffer;
import static org.sejda.core.support.io.model.FileOutput.file;
import static org.sejda.core.support.prefix.NameGenerator.nameGenerator;
import static org.sejda.core.support.prefix.model.NameGenerationRequest.nameRequest;
import java.io.File;
import org.sejda.core.support.io.MultipleOutputWriter;
import org.sejda.core.support.io.OutputWriters;
import org.sejda.core.support.prefix.NameGenerator;
import org.sejda.core.support.prefix.model.NameGenerationRequest;
import org.sejda.core.support.util.HumanReadableSize;
import org.sejda.impl.sambox.component.PagesExtractor;
import org.sejda.model.exception.TaskException;
import org.sejda.model.input.PdfSource;
import org.sejda.model.parameter.base.MultiplePdfSourceMultipleOutputParameters;
import org.sejda.model.split.NextOutputStrategy;
import org.sejda.model.task.TaskExecutionContext;
import org.sejda.sambox.pdmodel.PDDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Abstract component providing a skeletal implementation of the split execution.
*
* @author Andrea Vacondio
* @param
* the type of parameters the splitter needs to have all the information necessary to perform the split.
*/
public abstract class AbstractPdfSplitter {
private static final Logger LOG = LoggerFactory.getLogger(AbstractPdfSplitter.class);
private PDDocument document;
private T parameters;
private int totalPages;
private MultipleOutputWriter outputWriter;
private boolean optimize = false;
private boolean discardOutline = false;
public AbstractPdfSplitter(PDDocument document, T parameters, boolean optimize, boolean discardOutline) {
this.document = document;
this.parameters = parameters;
this.totalPages = document.getNumberOfPages();
this.optimize = optimize;
this.discardOutline = discardOutline;
}
public void split(TaskExecutionContext executionContext, String outputPrefix, PdfSource> source)
throws TaskException {
nextOutputStrategy().ensureIsValid();
this.outputWriter = OutputWriters.newMultipleOutputWriter(parameters.getExistingOutputPolicy(),
executionContext);
NameGenerator nameGen = nameGenerator(outputPrefix);
try (PagesExtractor extractor = supplyPagesExtractor(document)) {
File tmpFile = null;
for (int page = 1; page <= totalPages; page++) {
if (nextOutputStrategy().isOpening(page)) {
LOG.debug("Starting split at page {} of the original document", page);
onOpen(page);
tmpFile = createTemporaryBuffer(parameters.getOutput());
LOG.debug("Created output temporary buffer {}", tmpFile);
int fileNumber = executionContext.incrementAndGetOutputDocumentsCounter();
String outName = parameters.getSpecificResultFilename(fileNumber);
if (isBlank(outName)) {
outName = nameGen.generate(enrichNameGenerationRequest(nameRequest().page(page)
.originalName(source.getName())
.fileNumber(fileNumber)));
}
outputWriter.addOutput(file(tmpFile).name(outName));
}
LOG.trace("Retaining page {} of the original document", page);
onRetain(page);
extractor.retain(page, executionContext);
notifyEvent(executionContext.notifiableTaskMetadata()).stepsCompleted(page)
.outOf(totalPages);
if (nextOutputStrategy().isClosing(page) || page == totalPages) {
onClose(page);
extractor.setVersion(parameters.getVersion());
extractor.setCompress(parameters.isCompress());
if (optimize) {
extractor.optimize();
}
extractor.save(tmpFile, discardOutline, parameters.getOutput()
.getEncryptionAtRestPolicy());
extractor.reset();
LOG.debug("Ending split at page {} of the original document, generated document size is {}", page,
HumanReadableSize.toString(tmpFile.length()));
}
}
}
parameters.getOutput()
.accept(outputWriter);
}
public abstract NameGenerationRequest enrichNameGenerationRequest(NameGenerationRequest request);
/**
* @return the strategy to use to know if it's time to open a new document or close the current one.
*/
public abstract NextOutputStrategy nextOutputStrategy();
/**
* Called when an output document is going to be opened. Extending classes can plug some logic here.
*
* @param page
* the page number which is going to be added
*/
protected void onOpen(int page) throws TaskException {
// nothing
}
/**
* Called when the given page is going to be added . Extending classes can plug some logic here.
*
* @param page
* the page number which is going to be added
*/
protected void onRetain(int page) throws TaskException {
// nothing
}
/**
* Called when an output document is going to be closed. Extending classes can plug some logic here.
*
* @param page
* the last added page number
*/
protected void onClose(int page) throws TaskException {
// nothing
}
/**
* Creates the {@link PagesExtractor} to be used by this {@link AbstractPdfSplitter}
*/
protected PagesExtractor supplyPagesExtractor(PDDocument document) {
return new PagesExtractor(document);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy