org.apache.pdfbox.pdmodel.PDDocument Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pdfbox Show documentation
Show all versions of pdfbox Show documentation
The Apache PDFBox library is an open source Java tool for working with PDF documents.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel;
import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.ScratchFile;
import org.apache.pdfbox.multipdf.PDFCloneUtility;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdfwriter.COSWriter;
import org.apache.pdfbox.pdmodel.common.COSArrayList;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
import org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandlerFactory;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField;
/**
* This is the in-memory representation of the PDF document.
* The #close() method must be called once the document is no longer needed.
*
* @author Ben Litchfield
*/
public class PDDocument implements Closeable
{
private static final Log LOG = LogFactory.getLog(PDDocument.class);
private final COSDocument document;
// cached values
private PDDocumentInformation documentInformation;
private PDDocumentCatalog documentCatalog;
// the encryption will be cached here. When the document is decrypted then
// the COSDocument will not have an "Encrypt" dictionary anymore and this object must be used
private PDEncryption encryption;
// holds a flag which tells us if we should remove all security from this documents.
private boolean allSecurityToBeRemoved;
// keep tracking customized documentId for the trailer. If null, a new id will be generated
// this ID doesn't represent the actual documentId from the trailer
private Long documentId;
// the pdf to be read
private final RandomAccessRead pdfSource;
// the access permissions of the document
private AccessPermission accessPermission;
// fonts to subset before saving
private final Set fontsToSubset = new HashSet();
// Signature interface
private SignatureInterface signInterface;
// document-wide cached resources
private ResourceCache resourceCache = new DefaultResourceCache();
/**
* Creates an empty PDF document.
* You need to add at least one page for the document to be valid.
*/
public PDDocument()
{
this(MemoryUsageSetting.setupMainMemoryOnly());
}
/**
* Creates an empty PDF document.
* You need to add at least one page for the document to be valid.
*
* @param memUsageSetting defines how memory is used for buffering PDF streams
*/
public PDDocument(MemoryUsageSetting memUsageSetting)
{
ScratchFile scratchFile = null;
try
{
scratchFile = new ScratchFile(memUsageSetting);
}
catch (IOException ioe)
{
LOG.warn("Error initializing scratch file: " + ioe.getMessage() +
". Fall back to main memory usage only.");
try
{
scratchFile = new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly());
}
catch (IOException ioe2) {}
}
document = new COSDocument(scratchFile);
pdfSource = null;
// First we need a trailer
COSDictionary trailer = new COSDictionary();
document.setTrailer(trailer);
// Next we need the root dictionary.
COSDictionary rootDictionary = new COSDictionary();
trailer.setItem(COSName.ROOT, rootDictionary);
rootDictionary.setItem(COSName.TYPE, COSName.CATALOG);
rootDictionary.setItem(COSName.VERSION, COSName.getPDFName("1.4"));
// next we need the pages tree structure
COSDictionary pages = new COSDictionary();
rootDictionary.setItem(COSName.PAGES, pages);
pages.setItem(COSName.TYPE, COSName.PAGES);
COSArray kidsArray = new COSArray();
pages.setItem(COSName.KIDS, kidsArray);
pages.setItem(COSName.COUNT, COSInteger.ZERO);
}
/**
* This will add a page to the document. This is a convenience method, that will add the page to the root of the
* hierarchy and set the parent of the page to the root.
*
* @param page The page to add to the document.
*/
public void addPage(PDPage page)
{
getPages().add(page);
}
/**
* Add a signature.
*
* @param sigObject is the PDSignatureField model
* @param signatureInterface is an interface which provides signing capabilities
* @throws IOException if there is an error creating required fields
*/
public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface) throws IOException
{
addSignature(sigObject, signatureInterface, new SignatureOptions());
}
/**
* This will add a signature to the document. If the 0-based page number in the options
* parameter is smaller than 0 or larger than max, the nearest valid page number will be used
* (i.e. 0 or max) and no exception will be thrown.
*
* @param sigObject is the PDSignatureField model
* @param signatureInterface is an interface which provides signing capabilities
* @param options signature options
* @throws IOException if there is an error creating required fields
*/
public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface,
SignatureOptions options) throws IOException
{
// Reserve content
// We need to reserve some space for the signature. Some signatures including
// big certificate chain and we need enough space to store it.
int preferredSignatureSize = options.getPreferredSignatureSize();
if (preferredSignatureSize > 0)
{
sigObject.setContents(new byte[preferredSignatureSize]);
}
else
{
sigObject.setContents(new byte[SignatureOptions.DEFAULT_SIGNATURE_SIZE]);
}
// Reserve ByteRange
sigObject.setByteRange(new int[] { 0, 1000000000, 1000000000, 1000000000 });
signInterface = signatureInterface;
// Create SignatureForm for signature and append it to the document
// Get the first valid page
int pageCount = getNumberOfPages();
if (pageCount == 0)
{
throw new IllegalStateException("Cannot sign an empty document");
}
int startIndex = Math.min(Math.max(options.getPage(), 0), pageCount - 1);
PDPage page = getPage(startIndex);
// Get the AcroForm from the Root-Dictionary and append the annotation
PDDocumentCatalog catalog = getDocumentCatalog();
PDAcroForm acroForm = catalog.getAcroForm();
catalog.getCOSObject().setNeedToBeUpdated(true);
if (acroForm == null)
{
acroForm = new PDAcroForm(this);
catalog.setAcroForm(acroForm);
}
else
{
acroForm.getCOSObject().setNeedToBeUpdated(true);
}
List fields = acroForm.getFields();
if (fields == null)
{
fields = new ArrayList();
acroForm.setFields(fields);
}
else
{
COSArray fieldArray = (COSArray) acroForm.getCOSObject().getDictionaryObject(COSName.FIELDS);
fieldArray.setNeedToBeUpdated(true);
}
PDSignatureField signatureField = findSignatureField(fields, sigObject);
if (signatureField == null)
{
signatureField = new PDSignatureField(acroForm);
// append the signature object
signatureField.setValue(sigObject);
// backward linking
signatureField.getWidgets().get(0).setPage(page);
}
// to conform PDF/A-1 requirement:
// The /F key's Print flag bit shall be set to 1 and
// its Hidden, Invisible and NoView flag bits shall be set to 0
signatureField.getWidgets().get(0).setPrinted(true);
// Set the AcroForm Fields
List acroFormFields = acroForm.getFields();
acroForm.getCOSObject().setDirect(true);
acroForm.setSignaturesExist(true);
acroForm.setAppendOnly(true);
boolean checkFields = checkSignatureField(acroFormFields, signatureField);
// Get the object from the visual signature
COSDocument visualSignature = options.getVisualSignature();
// Distinction of case for visual and non-visual signature
if (visualSignature == null)
{
prepareNonVisibleSignature(signatureField);
return;
}
prepareVisibleSignature(signatureField, acroForm, visualSignature);
// Create Annotation / Field for signature
List annotations = page.getAnnotations();
// Make /Annots a direct object to avoid problem if it is an existing indirect object:
// it would not be updated in incremental save, and if we'd set the /Annots array "to be updated"
// while keeping it indirect, Adobe Reader would claim that the document had been modified.
page.setAnnotations(annotations);
// Get the annotations of the page and append the signature-annotation to it
// take care that page and acroforms do not share the same array (if so, we don't need to add it twice)
if (!(annotations instanceof COSArrayList &&
acroFormFields instanceof COSArrayList &&
((COSArrayList) annotations).toList().equals(((COSArrayList) acroFormFields).toList()) &&
checkFields))
{
annotations.add(signatureField.getWidgets().get(0));
}
page.getCOSObject().setNeedToBeUpdated(true);
}
// search acroform field list for signature field with specific signature dictionary
private PDSignatureField findSignatureField(List fields, PDSignature sigObject)
{
PDSignatureField signatureField = null;
for (PDField pdField : fields)
{
if (pdField instanceof PDSignatureField)
{
PDSignature signature = ((PDSignatureField) pdField).getSignature();
if (signature != null && signature.getCOSObject().equals(sigObject.getCOSObject()))
{
signatureField = (PDSignatureField) pdField;
}
}
}
return signatureField;
}
// return true if the field already existed in the field list, in that case, it is marked for update
private boolean checkSignatureField(List acroFormFields, PDSignatureField signatureField)
{
boolean checkFields = false;
for (PDField field : acroFormFields)
{
if (field instanceof PDSignatureField
&& field.getCOSObject().equals(signatureField.getCOSObject()))
{
checkFields = true;
signatureField.getCOSObject().setNeedToBeUpdated(true);
break;
}
// fixme: this code does not check non-terminal fields, there could be a descendant signature
}
if (!checkFields)
{
acroFormFields.add(signatureField);
}
return checkFields;
}
private void prepareVisibleSignature(PDSignatureField signatureField, PDAcroForm acroForm,
COSDocument visualSignature)
{
// Obtain visual signature object
boolean annotNotFound = true;
boolean sigFieldNotFound = true;
for (COSObject cosObject : visualSignature.getObjects())
{
if (!annotNotFound && !sigFieldNotFound)
{
break;
}
COSBase base = cosObject.getObject();
if (base instanceof COSDictionary)
{
COSDictionary cosBaseDict = (COSDictionary) base;
// Search for signature annotation
COSBase type = cosBaseDict.getDictionaryObject(COSName.TYPE);
if (annotNotFound && COSName.ANNOT.equals(type))
{
assignSignatureRectangle(signatureField, cosBaseDict);
annotNotFound = false;
}
// Search for signature field
COSBase fieldType = cosBaseDict.getDictionaryObject(COSName.FT);
COSBase apDict = cosBaseDict.getDictionaryObject(COSName.AP);
if (sigFieldNotFound && COSName.SIG.equals(fieldType) && apDict instanceof COSDictionary)
{
assignAppearanceDictionary(signatureField, (COSDictionary) apDict);
assignAcroFormDefaultResource(acroForm, cosBaseDict);
sigFieldNotFound = false;
}
}
}
if (annotNotFound || sigFieldNotFound)
{
throw new IllegalArgumentException("Template is missing required objects");
}
}
private void assignSignatureRectangle(PDSignatureField signatureField, COSDictionary annotDict)
{
// Read and set the rectangle for visual signature
COSArray rectArray = (COSArray) annotDict.getDictionaryObject(COSName.RECT);
PDRectangle rect = new PDRectangle(rectArray);
signatureField.getWidgets().get(0).setRectangle(rect);
}
private void assignAppearanceDictionary(PDSignatureField signatureField, COSDictionary apDict)
{
// read and set Appearance Dictionary
PDAppearanceDictionary ap = new PDAppearanceDictionary(apDict);
apDict.setDirect(true);
signatureField.getWidgets().get(0).setAppearance(ap);
}
private void assignAcroFormDefaultResource(PDAcroForm acroForm, COSDictionary dict)
{
// read and set AcroForm default resource dictionary /DR if available
COSBase base = dict.getDictionaryObject(COSName.DR);
if (base instanceof COSDictionary)
{
COSDictionary dr = (COSDictionary) base;
dr.setDirect(true);
dr.setNeedToBeUpdated(true);
acroForm.getCOSObject().setItem(COSName.DR, dr);
}
}
private void prepareNonVisibleSignature(PDSignatureField signatureField)
throws IOException
{
// "Signature fields that are not intended to be visible shall
// have an annotation rectangle that has zero height and width."
// Set rectangle for non-visual signature to rectangle array [ 0 0 0 0 ]
signatureField.getWidgets().get(0).setRectangle(new PDRectangle());
}
/**
* This will add a signature field to the document.
*
* @param sigFields are the PDSignatureFields that should be added to the document
* @param signatureInterface is a interface which provides signing capabilities
* @param options signature options
* @throws IOException if there is an error creating required fields
*/
public void addSignatureField(List sigFields, SignatureInterface signatureInterface,
SignatureOptions options) throws IOException
{
PDDocumentCatalog catalog = getDocumentCatalog();
catalog.getCOSObject().setNeedToBeUpdated(true);
PDAcroForm acroForm = catalog.getAcroForm();
if (acroForm == null)
{
acroForm = new PDAcroForm(this);
catalog.setAcroForm(acroForm);
}
COSDictionary acroFormDict = acroForm.getCOSObject();
acroFormDict.setDirect(true);
acroFormDict.setNeedToBeUpdated(true);
if (!acroForm.isSignaturesExist())
{
// 1 if at least one signature field is available
acroForm.setSignaturesExist(true);
}
List acroformFields = acroForm.getFields();
for (PDSignatureField sigField : sigFields)
{
sigField.getCOSObject().setNeedToBeUpdated(true);
// Check if the field already exists
checkSignatureField(acroformFields, sigField);
// Check if we need to add a signature
if (sigField.getSignature() != null)
{
sigField.getCOSObject().setNeedToBeUpdated(true);
if (options == null)
{
// TODO ??
}
addSignature(sigField.getSignature(), signatureInterface, options);
}
}
}
/**
* Remove the page from the document.
*
* @param page The page to remove from the document.
*/
public void removePage(PDPage page)
{
getPages().remove(page);
}
/**
* Remove the page from the document.
*
* @param pageNumber 0 based index to page number.
*/
public void removePage(int pageNumber)
{
getPages().remove(pageNumber);
}
/**
* This will import and copy the contents from another location. Currently the content stream is stored in a scratch
* file. The scratch file is associated with the document. If you are adding a page to this document from another
* document and want to copy the contents to this document's scratch file then use this method otherwise just use
* the {@link #addPage} method.
*
* Unlike {@link #addPage}, this method does a deep clone. This will be slower and have a larger
* memory footprint. However the deep clone is important to avoid resources getting lost if the
* source document is closed when the destination document is saved.
*
* If your page has annotations, and if these link to pages not in the target document, then the
* target document might become huge. What you need to do is to delete page references of such
* annotations. See
* here for how to do this.
*
* @param page The page to import.
* @return The page that was imported.
*
* @throws IOException If there is an error copying the page.
*/
public PDPage importPage(PDPage page) throws IOException
{
PDFCloneUtility cloner = new PDFCloneUtility(this);
COSBase pageBase = cloner.cloneForNewDocument(page.getCOSObject());
PDPage importedPage = new PDPage((COSDictionary) pageBase, resourceCache);
addPage(importedPage);
return importedPage;
}
/**
* Constructor that uses an existing document. The COSDocument that is passed in must be valid.
*
* @param doc The COSDocument that this document wraps.
*/
public PDDocument(COSDocument doc)
{
this(doc, null);
}
/**
* Constructor that uses an existing document. The COSDocument that is passed in must be valid.
*
* @param doc The COSDocument that this document wraps.
* @param source the parser which is used to read the pdf
*/
public PDDocument(COSDocument doc, RandomAccessRead source)
{
this(doc, source, null);
}
/**
* Constructor that uses an existing document. The COSDocument that is passed in must be valid.
*
* @param doc The COSDocument that this document wraps.
* @param source the parser which is used to read the pdf
* @param permission he access permissions of the pdf
*
*/
public PDDocument(COSDocument doc, RandomAccessRead source, AccessPermission permission)
{
document = doc;
pdfSource = source;
accessPermission = permission;
}
/**
* This will get the low level document.
*
* @return The document that this layer sits on top of.
*/
public COSDocument getDocument()
{
return document;
}
/**
* This will get the document info dictionary. This is guaranteed to not return null.
*
* @return The documents /Info dictionary
*/
public PDDocumentInformation getDocumentInformation()
{
if (documentInformation == null)
{
COSDictionary trailer = document.getTrailer();
COSDictionary infoDic = (COSDictionary) trailer.getDictionaryObject(COSName.INFO);
if (infoDic == null)
{
infoDic = new COSDictionary();
trailer.setItem(COSName.INFO, infoDic);
}
documentInformation = new PDDocumentInformation(infoDic);
}
return documentInformation;
}
/**
* This will set the document information for this document.
*
* @param info The updated document information.
*/
public void setDocumentInformation(PDDocumentInformation info)
{
documentInformation = info;
document.getTrailer().setItem(COSName.INFO, info.getCOSObject());
}
/**
* This will get the document CATALOG. This is guaranteed to not return null.
*
* @return The documents /Root dictionary
*/
public PDDocumentCatalog getDocumentCatalog()
{
if (documentCatalog == null)
{
COSDictionary trailer = document.getTrailer();
COSBase dictionary = trailer.getDictionaryObject(COSName.ROOT);
if (dictionary instanceof COSDictionary)
{
documentCatalog = new PDDocumentCatalog(this, (COSDictionary) dictionary);
}
else
{
documentCatalog = new PDDocumentCatalog(this);
}
}
return documentCatalog;
}
/**
* This will tell if this document is encrypted or not.
*
* @return true If this document is encrypted.
*/
public boolean isEncrypted()
{
return document.isEncrypted();
}
/**
* This will get the encryption dictionary for this document. This will still return the parameters if the document
* was decrypted. As the encryption architecture in PDF documents is plugable this returns an abstract class,
* but the only supported subclass at this time is a
* PDStandardEncryption object.
*
* @return The encryption dictionary(most likely a PDStandardEncryption object)
*/
public PDEncryption getEncryption()
{
if (encryption == null && isEncrypted())
{
encryption = new PDEncryption(document.getEncryptionDictionary());
}
return encryption;
}
/**
* This will set the encryption dictionary for this document.
*
* @param encryption The encryption dictionary(most likely a PDStandardEncryption object)
*
* @throws IOException If there is an error determining which security handler to use.
*/
public void setEncryptionDictionary(PDEncryption encryption) throws IOException
{
this.encryption = encryption;
}
/**
* This will return the last signature.
*
* @return the last signature as PDSignatureField
.
* @throws IOException if no document catalog can be found.
*/
public PDSignature getLastSignatureDictionary() throws IOException
{
List signatureDictionaries = getSignatureDictionaries();
int size = signatureDictionaries.size();
if (size > 0)
{
return signatureDictionaries.get(size - 1);
}
return null;
}
/**
* Retrieve all signature fields from the document.
*
* @return a List
of PDSignatureField
s
* @throws IOException if no document catalog can be found.
*/
public List getSignatureFields() throws IOException
{
List fields = new ArrayList();
PDAcroForm acroForm = getDocumentCatalog().getAcroForm();
if (acroForm != null)
{
for (PDField field : acroForm.getFieldTree())
{
if (field instanceof PDSignatureField)
{
fields.add((PDSignatureField)field);
}
}
}
return fields;
}
/**
* Retrieve all signature dictionaries from the document.
*
* @return a List
of PDSignatureField
s
* @throws IOException if no document catalog can be found.
*/
public List getSignatureDictionaries() throws IOException
{
List signatures = new ArrayList();
for (PDSignatureField field : getSignatureFields())
{
COSBase value = field.getCOSObject().getDictionaryObject(COSName.V);
if (value != null)
{
signatures.add(new PDSignature((COSDictionary)value));
}
}
return signatures;
}
/**
* Returns the list of fonts which will be subset before the document is saved.
*/
Set getFontsToSubset()
{
return fontsToSubset;
}
/**
* Parses a PDF. Unrestricted main memory will be used for buffering PDF streams.
*
* @param file file to be loaded
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(File file) throws IOException
{
return load(file, "", MemoryUsageSetting.setupMainMemoryOnly());
}
/**
* Parses a PDF.
*
* @param file file to be loaded
* @param memUsageSetting defines how memory is used for buffering PDF streams
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(File file, MemoryUsageSetting memUsageSetting) throws IOException
{
return load(file, "", null, null, memUsageSetting);
}
/**
* Parses a PDF. Unrestricted main memory will be used for buffering PDF streams.
*
* @param file file to be loaded
* @param password password to be used for decryption
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(File file, String password) throws IOException
{
return load(file, password, null, null, MemoryUsageSetting.setupMainMemoryOnly());
}
/**
* Parses a PDF.
*
* @param file file to be loaded
* @param password password to be used for decryption
* @param memUsageSetting defines how memory is used for buffering PDF streams
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(File file, String password, MemoryUsageSetting memUsageSetting) throws IOException
{
return load(file, password, null, null, memUsageSetting);
}
/**
* Parses a PDF. Unrestricted main memory will be used for buffering PDF streams.
*
* @param file file to be loaded
* @param password password to be used for decryption
* @param keyStore key store to be used for decryption when using public key security
* @param alias alias to be used for decryption when using public key security
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(File file, String password, InputStream keyStore, String alias)
throws IOException
{
return load(file, password, keyStore, alias, MemoryUsageSetting.setupMainMemoryOnly());
}
/**
* Parses a PDF.
*
* @param file file to be loaded
* @param password password to be used for decryption
* @param keyStore key store to be used for decryption when using public key security
* @param alias alias to be used for decryption when using public key security
* @param memUsageSetting defines how memory is used for buffering PDF streams
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(File file, String password, InputStream keyStore, String alias,
MemoryUsageSetting memUsageSetting) throws IOException
{
RandomAccessBufferedFileInputStream raFile = new RandomAccessBufferedFileInputStream(file);
try
{
ScratchFile scratchFile = new ScratchFile(memUsageSetting);
try
{
PDFParser parser = new PDFParser(raFile, password, keyStore, alias, scratchFile);
parser.parse();
return parser.getPDDocument();
}
catch (IOException ioe)
{
IOUtils.closeQuietly(scratchFile);
throw ioe;
}
}
catch (IOException ioe)
{
IOUtils.closeQuietly(raFile);
throw ioe;
}
}
/**
* Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf.
* Unrestricted main memory will be used for buffering PDF streams.
*
* @param input stream that contains the document.
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(InputStream input) throws IOException
{
return load(input, "", null, null, MemoryUsageSetting.setupMainMemoryOnly());
}
/**
* Parses a PDF. Depending on the memory settings parameter the given input
* stream is either copied to main memory or to a temporary file to enable
* random access to the pdf.
*
* @param input stream that contains the document.
* @param memUsageSetting defines how memory is used for buffering input stream and PDF streams
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(InputStream input, MemoryUsageSetting memUsageSetting) throws IOException
{
return load(input, "", null, null, memUsageSetting);
}
/**
* Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf.
* Unrestricted main memory will be used for buffering PDF streams.
*
* @param input stream that contains the document.
* @param password password to be used for decryption
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(InputStream input, String password)
throws IOException
{
return load(input, password, null, null, MemoryUsageSetting.setupMainMemoryOnly());
}
/**
* Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf.
* Unrestricted main memory will be used for buffering PDF streams.
*
* @param input stream that contains the document.
* @param password password to be used for decryption
* @param keyStore key store to be used for decryption when using public key security
* @param alias alias to be used for decryption when using public key security
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(InputStream input, String password, InputStream keyStore, String alias)
throws IOException
{
return load(input, password, keyStore, alias, MemoryUsageSetting.setupMainMemoryOnly());
}
/**
* Parses a PDF. Depending on the memory settings parameter the given input
* stream is either copied to main memory or to a temporary file to enable
* random access to the pdf.
*
* @param input stream that contains the document.
* @param password password to be used for decryption
* @param memUsageSetting defines how memory is used for buffering input stream and PDF streams
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(InputStream input, String password, MemoryUsageSetting memUsageSetting)
throws IOException
{
return load(input, password, null, null, memUsageSetting);
}
/**
* Parses a PDF. Depending on the memory settings parameter the given input
* stream is either copied to memory or to a temporary file to enable
* random access to the pdf.
*
* @param input stream that contains the document.
* @param password password to be used for decryption
* @param keyStore key store to be used for decryption when using public key security
* @param alias alias to be used for decryption when using public key security
* @param memUsageSetting defines how memory is used for buffering input stream and PDF streams
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(InputStream input, String password, InputStream keyStore,
String alias, MemoryUsageSetting memUsageSetting) throws IOException
{
ScratchFile scratchFile = new ScratchFile(memUsageSetting);
try
{
RandomAccessRead source = scratchFile.createBuffer(input);
PDFParser parser = new PDFParser(source, password, keyStore, alias, scratchFile);
parser.parse();
return parser.getPDDocument();
}
catch (IOException ioe)
{
IOUtils.closeQuietly(scratchFile);
throw ioe;
}
}
/**
* Parses a PDF. Unrestricted main memory will be used for buffering PDF streams.
*
* @param input byte array that contains the document.
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(byte[] input) throws IOException
{
return load(input, "");
}
/**
* Parses a PDF. Unrestricted main memory will be used for buffering PDF streams.
*
* @param input byte array that contains the document.
* @param password password to be used for decryption
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(byte[] input, String password) throws IOException
{
return load(input, password, null, null);
}
/**
* Parses a PDF. Unrestricted main memory will be used for buffering PDF streams.
*
* @param input byte array that contains the document.
* @param password password to be used for decryption
* @param keyStore key store to be used for decryption when using public key security
* @param alias alias to be used for decryption when using public key security
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(byte[] input, String password, InputStream keyStore,
String alias) throws IOException
{
return load(input, password, keyStore, alias, MemoryUsageSetting.setupMainMemoryOnly());
}
/**
* Parses a PDF.
*
* @param input byte array that contains the document.
* @param password password to be used for decryption
* @param keyStore key store to be used for decryption when using public key security
* @param alias alias to be used for decryption when using public key security
* @param memUsageSetting defines how memory is used for buffering input stream and PDF streams
*
* @return loaded document
*
* @throws IOException in case of a file reading or parsing error
*/
public static PDDocument load(byte[] input, String password, InputStream keyStore,
String alias, MemoryUsageSetting memUsageSetting) throws IOException
{
ScratchFile scratchFile = new ScratchFile(memUsageSetting);
RandomAccessRead source = new RandomAccessBuffer(input);
PDFParser parser = new PDFParser(source, password, keyStore, alias, scratchFile);
parser.parse();
return parser.getPDDocument();
}
/**
* Save the document to a file.
*
* @param fileName The file to save as.
*
* @throws IOException if the output could not be written
*/
public void save(String fileName) throws IOException
{
save(new File(fileName));
}
/**
* Save the document to a file.
*
* @param file The file to save as.
*
* @throws IOException if the output could not be written
*/
public void save(File file) throws IOException
{
save(new BufferedOutputStream(new FileOutputStream(file)));
}
/**
* This will save the document to an output stream.
*
* @param output The stream to write to.
*
* @throws IOException if the output could not be written
*/
public void save(OutputStream output) throws IOException
{
if (document.isClosed())
{
throw new IOException("Cannot save a document which has been closed");
}
// subset designated fonts
for (PDFont font : fontsToSubset)
{
font.subset();
}
fontsToSubset.clear();
// save PDF
COSWriter writer = new COSWriter(output);
try
{
writer.write(this);
writer.close();
}
finally
{
writer.close();
}
}
/**
* Save the PDF as an incremental update. This is only possible if the PDF was loaded from a
* file or a stream, not if the document was created in PDFBox itself.
*
* @param output stream to write
* @throws IOException if the output could not be written
* @throws IllegalStateException if the document was not loaded from a file or a stream.
*/
public void saveIncremental(OutputStream output) throws IOException
{
COSWriter writer = null;
try
{
if (pdfSource == null)
{
throw new IllegalStateException("document was not loaded from a file or a stream");
}
writer = new COSWriter(output, pdfSource);
writer.write(this, signInterface);
writer.close();
}
finally
{
if (writer != null)
{
writer.close();
}
}
}
/**
* Returns the page at the given index.
*
* @param pageIndex the page index
* @return the page at the given index.
*/
public PDPage getPage(int pageIndex) // todo: REPLACE most calls to this method with BELOW method
{
return getDocumentCatalog().getPages().get(pageIndex);
}
/**
* Returns the page tree.
*
* @return the page tree
*/
public PDPageTree getPages()
{
return getDocumentCatalog().getPages();
}
/**
* This will return the total page count of the PDF document.
*
* @return The total number of pages in the PDF document.
*/
public int getNumberOfPages()
{
return getDocumentCatalog().getPages().getCount();
}
/**
* This will close the underlying COSDocument object.
*
* @throws IOException If there is an error releasing resources.
*/
@Override
public void close() throws IOException
{
if (!document.isClosed())
{
// close all intermediate I/O streams
document.close();
// close the source PDF stream, if we read from one
if (pdfSource != null)
{
pdfSource.close();
}
}
}
/**
* Protects the document with a protection policy. The document content will be really
* encrypted when it will be saved. This method only marks the document for encryption. It also
* calls {@link #setAllSecurityToBeRemoved(boolean)} with a false argument if it was set to true
* previously and logs a warning.
*
* @see org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy
* @see org.apache.pdfbox.pdmodel.encryption.PublicKeyProtectionPolicy
*
* @param policy The protection policy.
* @throws IOException if there isn't any suitable security handler.
*/
public void protect(ProtectionPolicy policy) throws IOException
{
if (isAllSecurityToBeRemoved())
{
LOG.warn("do not call setAllSecurityToBeRemoved(true) before calling protect(), "
+ "as protect() implies setAllSecurityToBeRemoved(false)");
setAllSecurityToBeRemoved(false);
}
if (!isEncrypted())
{
encryption = new PDEncryption();
}
SecurityHandler securityHandler = SecurityHandlerFactory.INSTANCE.newSecurityHandlerForPolicy(policy);
if (securityHandler == null)
{
throw new IOException("No security handler for policy " + policy);
}
getEncryption().setSecurityHandler(securityHandler);
}
/**
* Returns the access permissions granted when the document was decrypted. If the document was not decrypted this
* method returns the access permission for a document owner (ie can do everything). The returned object is in read
* only mode so that permissions cannot be changed. Methods providing access to content should rely on this object
* to verify if the current user is allowed to proceed.
*
* @return the access permissions for the current user on the document.
*/
public AccessPermission getCurrentAccessPermission()
{
if (accessPermission == null)
{
accessPermission = AccessPermission.getOwnerAccessPermission();
}
return accessPermission;
}
/**
* Indicates if all security is removed or not when writing the pdf.
*
* @return returns true if all security shall be removed otherwise false
*/
public boolean isAllSecurityToBeRemoved()
{
return allSecurityToBeRemoved;
}
/**
* Activates/Deactivates the removal of all security when writing the pdf.
*
* @param removeAllSecurity remove all security if set to true
*/
public void setAllSecurityToBeRemoved(boolean removeAllSecurity)
{
allSecurityToBeRemoved = removeAllSecurity;
}
/**
* Provides the document ID.
*
* @return the dcoument ID
*/
public Long getDocumentId()
{
return documentId;
}
/**
* Sets the document ID to the given value.
*
* @param docId the new document ID
*/
public void setDocumentId(Long docId)
{
documentId = docId;
}
/**
* Returns the PDF specification version this document conforms to.
*
* @return the PDF version (e.g. 1.4f)
*/
public float getVersion()
{
float headerVersionFloat = getDocument().getVersion();
// there may be a second version information in the document catalog starting with 1.4
if (headerVersionFloat >= 1.4f)
{
String catalogVersion = getDocumentCatalog().getVersion();
float catalogVersionFloat = -1;
if (catalogVersion != null)
{
try
{
catalogVersionFloat = Float.parseFloat(catalogVersion);
}
catch(NumberFormatException exception)
{
LOG.error("Can't extract the version number of the document catalog.", exception);
}
}
// the most recent version is the correct one
return Math.max(catalogVersionFloat, headerVersionFloat);
}
else
{
return headerVersionFloat;
}
}
/**
* Sets the PDF specification version for this document.
*
* @param newVersion the new PDF version (e.g. 1.4f)
*
*/
public void setVersion(float newVersion)
{
float currentVersion = getVersion();
// nothing to do?
if (newVersion == currentVersion)
{
return;
}
// the version can't be downgraded
if (newVersion < currentVersion)
{
LOG.error("It's not allowed to downgrade the version of a pdf.");
return;
}
// update the catalog version if the document version is >= 1.4
if (getDocument().getVersion() >= 1.4f)
{
getDocumentCatalog().setVersion(Float.toString(newVersion));
}
else
{
// versions < 1.4f have a version header only
getDocument().setVersion(newVersion);
}
}
/**
* Returns the resource cache associated with this document, or null if there is none.
*/
public ResourceCache getResourceCache()
{
return resourceCache;
}
/**
* Sets the resource cache associated with this document.
*
* @param resourceCache A resource cache, or null.
*/
public void setResourceCache(ResourceCache resourceCache)
{
this.resourceCache = resourceCache;
}
}