org.sejda.impl.itext.component.PdfUnpacker Maven / Gradle / Ivy
/*
* Created on 22/ago/2011
* Copyright 2011 by Andrea Vacondio ([email protected]).
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sejda.impl.itext.component;
import static org.sejda.core.support.io.IOUtils.createTemporaryBuffer;
import static org.sejda.core.support.io.model.FileOutput.file;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.sejda.common.collection.NullSafeSet;
import org.sejda.core.support.io.MultipleOutputWriter;
import org.sejda.core.support.io.OutputWriters;
import org.sejda.model.exception.TaskException;
import org.sejda.model.exception.TaskIOException;
import org.sejda.model.output.MultipleTaskOutput;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.lowagie.text.pdf.PRStream;
import com.lowagie.text.pdf.PdfArray;
import com.lowagie.text.pdf.PdfDictionary;
import com.lowagie.text.pdf.PdfName;
import com.lowagie.text.pdf.PdfNameTree;
import com.lowagie.text.pdf.PdfObject;
import com.lowagie.text.pdf.PdfReader;
import com.lowagie.text.pdf.PdfString;
/**
* Component responsible for unpacking attachments attached to multiple pdf documents.
*
* @author Andrea Vacondio
*
*/
public class PdfUnpacker {
private static final Logger LOG = LoggerFactory.getLogger(PdfUnpacker.class);
private MultipleOutputWriter outputWriter;
public PdfUnpacker(boolean overwrite) {
outputWriter = OutputWriters.newMultipleOutputWriter(overwrite);
}
public void unpack(PdfReader reader) throws TaskException {
if (reader == null) {
throw new TaskException("Unable to unpack a null reader.");
}
LOG.debug("Unpacking started");
Set dictionaries = getAttachmentsDictionaries(reader);
if (dictionaries.isEmpty()) {
LOG.info("No attachments found.");
} else {
unpack(dictionaries);
}
}
private void unpack(Set dictionaries) throws TaskIOException {
for (PdfDictionary dictionary : dictionaries) {
PdfName type = (PdfName) PdfReader.getPdfObject(dictionary.get(PdfName.TYPE));
if (PdfName.F.equals(type) || PdfName.FILESPEC.equals(type)) {
PdfDictionary ef = (PdfDictionary) PdfReader.getPdfObject(dictionary.get(PdfName.EF));
PdfString fn = (PdfString) PdfReader.getPdfObject(dictionary.get(PdfName.F));
if (fn != null && ef != null) {
PRStream prs = (PRStream) PdfReader.getPdfObject(ef.get(PdfName.F));
if (prs != null) {
File tmpFile = copyToTemporaryFile(prs);
outputWriter.addOutput(file(tmpFile).name(fn.toUnicodeString()));
}
}
}
}
}
private File copyToTemporaryFile(PRStream prs) throws TaskIOException {
File tmpFile = createTemporaryBuffer();
LOG.debug("Created output temporary buffer {}", tmpFile);
ByteArrayInputStream inputStream = null;
try {
inputStream = new ByteArrayInputStream(PdfReader.getStreamBytes(prs));
FileUtils.copyInputStreamToFile(inputStream, tmpFile);
LOG.debug("Attachment unpacked to temporary buffer");
} catch (IOException e) {
throw new TaskIOException("Unable to copy attachment to temporary file.", e);
} finally {
IOUtils.closeQuietly(inputStream);
}
return tmpFile;
}
/**
* writes to the output
*
* @param output
* @throws TaskIOException
*/
public void write(MultipleTaskOutput> output) throws TaskException {
output.accept(outputWriter);
}
private Set getAttachmentsDictionaries(PdfReader reader) {
Set retSet = new NullSafeSet();
retSet.addAll(getEmbeddedFilesDictionaries(reader));
retSet.addAll(getFileAttachmentsDictionaries(reader));
return retSet;
}
private Set getEmbeddedFilesDictionaries(PdfReader reader) {
Set retSet = new NullSafeSet();
PdfDictionary catalog = reader.getCatalog();
PdfDictionary names = (PdfDictionary) PdfReader.getPdfObject(catalog.get(PdfName.NAMES));
if (names != null) {
PdfDictionary embFiles = (PdfDictionary) PdfReader.getPdfObject(names.get(PdfName.EMBEDDEDFILES));
if (embFiles != null) {
@SuppressWarnings("unchecked")
HashMap embMap = PdfNameTree.readTree(embFiles);
for (PdfObject value : embMap.values()) {
retSet.add((PdfDictionary) PdfReader.getPdfObject(value));
}
}
}
return retSet;
}
private Set getFileAttachmentsDictionaries(PdfReader reader) {
Set retSet = new NullSafeSet();
for (int k = 1; k <= reader.getNumberOfPages(); ++k) {
PdfArray annots = (PdfArray) PdfReader.getPdfObject(reader.getPageN(k).get(PdfName.ANNOTS));
if (annots != null) {
for (@SuppressWarnings("unchecked")
Iterator iter = annots.listIterator(); iter.hasNext();) {
PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject(iter.next());
PdfName subType = (PdfName) PdfReader.getPdfObject(annot.get(PdfName.SUBTYPE));
if (PdfName.FILEATTACHMENT.equals(subType)) {
retSet.add((PdfDictionary) PdfReader.getPdfObject(annot.get(PdfName.FS)));
}
}
}
}
return retSet;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy