All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pdfbox.pdmodel.PDDocument Maven / Gradle / Ivy

Go to download

The Apache PDFBox library is an open source Java tool for working with PDF documents.

There is a newer version: 3.0.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.pdmodel;

import java.awt.print.PageFormat;
import java.awt.print.Pageable;
import java.awt.print.Printable;
import java.awt.print.PrinterException;
import java.awt.print.PrinterJob;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.exceptions.SignatureException;
import org.apache.pdfbox.io.RandomAccess;
import org.apache.pdfbox.pdfparser.BaseParser;
import org.apache.pdfbox.pdfparser.NonSequentialPDFParser;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdfwriter.COSWriter;
import org.apache.pdfbox.pdmodel.common.COSArrayList;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException;
import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial;
import org.apache.pdfbox.pdmodel.encryption.PDEncryptionDictionary;
import org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandlersManager;
import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
import org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField;

/**
 * This is the in-memory representation of the PDF document.  You need to call
 * close() on this object when you are done using it!!
 * 

* This class implements the {@link Pageable} interface, but since PDFBox * version 1.3.0 you should be using the {@link PDPageable} adapter instead * (see PDFBOX-788). * * @author Ben Litchfield * @version $Revision: 1.47 $ */ public class PDDocument implements Pageable, Closeable { /** * Log instance. */ private static final Log LOG = LogFactory.getLog(PDDocument.class); private COSDocument document; //cached values private PDDocumentInformation documentInformation; private PDDocumentCatalog documentCatalog; //The encParameters will be cached here. When the document is decrypted then //the COSDocument will not have an "Encrypt" dictionary anymore and this object //must be used. private PDEncryptionDictionary encParameters = null; /** * The security handler used to decrypt / encrypt the document. */ private SecurityHandler securityHandler = null; /** * This assocates object ids with a page number. It's used to determine * the page number for bookmarks (or page numbers for anything else for * which you have an object id for that matter). */ private Map pageMap = null; /** * This will hold a flag which tells us if we should remove all security * from this documents. */ private boolean allSecurityToBeRemoved = false; /** * Keep tracking customized documentId for the trailer. If null, a new * id will be generated for the document. This ID doesn't represent the * actual documentId from the trailer. */ private Long documentId; private BaseParser parser; /** * Constructor, creates a new PDF Document with no pages. You need to add * at least one page for the document to be valid. */ public PDDocument() { document = new COSDocument(); //First we need a trailer COSDictionary trailer = new COSDictionary(); document.setTrailer( trailer ); //Next we need the root dictionary. COSDictionary rootDictionary = new COSDictionary(); trailer.setItem( COSName.ROOT, rootDictionary ); rootDictionary.setItem( COSName.TYPE, COSName.CATALOG ); rootDictionary.setItem( COSName.VERSION, COSName.getPDFName( "1.4" ) ); //next we need the pages tree structure COSDictionary pages = new COSDictionary(); rootDictionary.setItem( COSName.PAGES, pages ); pages.setItem( COSName.TYPE, COSName.PAGES ); COSArray kidsArray = new COSArray(); pages.setItem( COSName.KIDS, kidsArray ); pages.setItem( COSName.COUNT, COSInteger.ZERO ); } private void generatePageMap() { pageMap = new HashMap(); // these page nodes could be references to pages, // or references to arrays which have references to pages // or references to arrays which have references to arrays which have references to pages // or ... (I think you get the idea...) processListOfPageReferences(getDocumentCatalog().getPages().getKids()); } private void processListOfPageReferences(List pageNodes) { int numberOfNodes = pageNodes.size(); for(int i=0; i < numberOfNodes; ++i) { Object pageOrArray = pageNodes.get(i); if(pageOrArray instanceof PDPage) { COSArray pageArray = ((COSArrayList)(((PDPage)pageOrArray).getParent()).getKids()).toList(); parseCatalogObject((COSObject)pageArray.get(i)); } else if(pageOrArray instanceof PDPageNode) { processListOfPageReferences(((PDPageNode)pageOrArray).getKids()); } } } /** * This will either add the page passed in, or, if it's a pointer to an array * of pages, it'll recursivly call itself and process everything in the list. */ private void parseCatalogObject(COSObject thePageOrArrayObject) { COSBase arrayCountBase = thePageOrArrayObject.getItem(COSName.COUNT); int arrayCount = -1; if(arrayCountBase instanceof COSInteger) { arrayCount = ((COSInteger)arrayCountBase).intValue(); } COSBase kidsBase = thePageOrArrayObject.getItem(COSName.KIDS); int kidsCount = -1; if(kidsBase instanceof COSArray) { kidsCount = ((COSArray)kidsBase).size(); } if(arrayCount == -1 || kidsCount == -1) { // these cases occur when we have a page, not an array of pages String objStr = String.valueOf(thePageOrArrayObject.getObjectNumber().intValue()); String genStr = String.valueOf(thePageOrArrayObject.getGenerationNumber().intValue()); getPageMap().put(objStr+","+genStr, getPageMap().size()+1); } else { // we either have an array of page pointers, or an array of arrays if(arrayCount == kidsCount) { // process the kids... they're all references to pages COSArray kidsArray = ((COSArray)kidsBase); for(int i=0; i getPageMap() { if (pageMap == null) { generatePageMap(); } return pageMap; } /** * This will add a page to the document. This is a convenience method, that * will add the page to the root of the hierarchy and set the parent of the * page to the root. * * @param page The page to add to the document. */ public void addPage( PDPage page ) { PDPageNode rootPages = getDocumentCatalog().getPages(); rootPages.getKids().add( page ); page.setParent( rootPages ); rootPages.updateCount(); } /** * Add a signature. * * @param sigObject is the PDSignature model * @param signatureInterface is a interface which provides signing capabilities * @throws IOException if there is an error creating required fields * @throws SignatureException if something went wrong */ public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface) throws IOException, SignatureException { SignatureOptions defaultOptions = new SignatureOptions(); defaultOptions.setPage(1); addSignature(sigObject, signatureInterface,defaultOptions); } /** * This will add a signature to the document. * * @param sigObject is the PDSignature model * @param signatureInterface is a interface which provides signing capabilities * @param options signature options * @throws IOException if there is an error creating required fields * @throws SignatureException if something went wrong */ public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface, SignatureOptions options) throws IOException, SignatureException { // Reserve content // We need to reserve some space for the signature. Some signatures including // big certificate chain and we need enough space to store it. int preferedSignatureSize = options.getPreferedSignatureSize(); if (preferedSignatureSize > 0) { sigObject.setContents(new byte[preferedSignatureSize]); } else { sigObject.setContents(new byte[0x2500]); } // Reserve ByteRange sigObject.setByteRange(new int[] {0,1000000000,1000000000,1000000000}); getDocument().setSignatureInterface(signatureInterface); // ######################################### // # Create SignatureForm for signature # // # and appending it to the document # // ######################################### // Get the first page PDDocumentCatalog root = getDocumentCatalog(); PDPageNode rootPages = root.getPages(); List kids = new ArrayList(); rootPages.getAllKids(kids); int size = (int)rootPages.getCount(); PDPage page; if (size == 0) { throw new SignatureException(SignatureException.INVALID_PAGE_FOR_SIGNATURE, "The PDF file has no pages"); } if (options.getPage()>size) { page = kids.get(size-1); } else if(options.getPage()<=0) { page = kids.get(0); } else { page = kids.get(options.getPage()-1); } // Get the AcroForm from the Root-Dictionary and append the annotation PDAcroForm acroForm = root.getAcroForm(); root.getCOSObject().setNeedToBeUpdate(true); if (acroForm==null) { acroForm = new PDAcroForm(this); root.setAcroForm(acroForm); } else { acroForm.getCOSObject().setNeedToBeUpdate(true); } /* * For invisible signatures, the annotation has a rectangle array with values [ 0 0 0 0 ]. * This annotation is usually attached to the viewed page when the signature is created. * Despite not having an appearance, the annotation AP and N dictionaries may be present * in some versions of Acrobat. If present, N references the DSBlankXObj (blank) XObject. */ // Create Annotation / Field for signature List annotations = page.getAnnotations(); List fields = acroForm.getFields(); PDSignatureField signatureField = null; if(fields == null) { fields = new ArrayList(); acroForm.setFields(fields); } for ( PDField pdField : fields ) { if (pdField instanceof PDSignatureField) { PDSignature signature = ((PDSignatureField)pdField).getSignature(); if (signature != null && signature.getDictionary().equals(sigObject.getDictionary())) { signatureField = (PDSignatureField)pdField; } } } if (signatureField == null) { signatureField = new PDSignatureField(acroForm); signatureField.setSignature(sigObject); // append the signature object signatureField.getWidget().setPage(page); // backward linking } // Set the AcroForm Fields List acroFormFields = acroForm.getFields(); COSDictionary acroFormDict = acroForm.getDictionary(); acroFormDict.setDirect(true); acroFormDict.setInt(COSName.SIG_FLAGS, 3); boolean checkFields = false; for ( PDField field : acroFormFields ) { if (field instanceof PDSignatureField) { if (((PDSignatureField)field).getCOSObject().equals(signatureField.getCOSObject())) { checkFields = true; signatureField.getCOSObject().setNeedToBeUpdate(true); break; } } } if (!checkFields) { acroFormFields.add(signatureField); } // Get the object from the visual signature COSDocument visualSignature = options.getVisualSignature(); // Distinction of case for visual and non-visual signature if (visualSignature == null) // non-visual signature { // Set rectangle for non-visual signature to 0 0 0 0 signatureField.getWidget().setRectangle(new PDRectangle()); // rectangle array [ 0 0 0 0 ] // Set empty Appearance-Dictionary PDAppearanceDictionary ap = new PDAppearanceDictionary(); COSStream apsStream = getDocument().createCOSStream(); apsStream.createUnfilteredStream(); PDAppearanceStream aps = new PDAppearanceStream(apsStream); COSDictionary cosObject = (COSDictionary)aps.getCOSObject(); cosObject.setItem(COSName.SUBTYPE, COSName.FORM); cosObject.setItem(COSName.BBOX, new PDRectangle()); ap.setNormalAppearance(aps); ap.getDictionary().setDirect(true); signatureField.getWidget().setAppearance(ap); } else // visual signature { // Obtain visual signature object List cosObjects = visualSignature.getObjects(); boolean annotNotFound = true; boolean sigFieldNotFound = true; for ( COSObject cosObject : cosObjects ) { if (!annotNotFound && !sigFieldNotFound) { break; } COSBase base = cosObject.getObject(); if (base != null && base instanceof COSDictionary) { COSBase ft = ((COSDictionary)base).getDictionaryObject(COSName.FT); COSBase type = ((COSDictionary)base).getDictionaryObject(COSName.TYPE); COSBase apDict = ((COSDictionary)base).getDictionaryObject(COSName.AP); // Search for signature annotation if (annotNotFound && COSName.ANNOT.equals(type)) { COSDictionary cosBaseDict = (COSDictionary)base; // Read and set the Rectangle for visual signature COSArray rectAry = (COSArray)cosBaseDict.getDictionaryObject(COSName.RECT); PDRectangle rect = new PDRectangle(rectAry); signatureField.getWidget().setRectangle(rect); annotNotFound = false; } // Search for Signature-Field if (sigFieldNotFound && COSName.SIG.equals(ft) && apDict != null) { COSDictionary cosBaseDict = (COSDictionary)base; // read and set Appearance Dictionary PDAppearanceDictionary ap = new PDAppearanceDictionary((COSDictionary)cosBaseDict.getDictionaryObject(COSName.AP)); ap.getDictionary().setDirect(true); signatureField.getWidget().setAppearance(ap); // read and set AcroForm DefaultResource COSBase dr = cosBaseDict.getDictionaryObject(COSName.DR); if (dr != null) { dr.setDirect(true); dr.setNeedToBeUpdate(true); acroFormDict.setItem(COSName.DR, dr); } sigFieldNotFound=false; } } } if (annotNotFound || sigFieldNotFound ) { throw new SignatureException(SignatureException.VISUAL_SIGNATURE_INVALID, "Could not read all needed objects from template"); } } // Get the annotations of the page and append the signature-annotation to it if (annotations == null) { annotations = new COSArrayList(); } // Make /Annots a direct object to avoid problem if it is an existing indirect object: // it would not be updated in incremental save, and if we'd set the /Annots array "to be updated" // while keeping it indirect, Adobe Reader would claim that the document had been modified. page.setAnnotations(annotations); // take care that page and acroforms do not share the same array (if so, we don't need to add it twice) if (!((annotations instanceof COSArrayList) && (acroFormFields instanceof COSArrayList) && (((COSArrayList)annotations).toList().equals(((COSArrayList)acroFormFields).toList()))) && !checkFields) { annotations.add(signatureField.getWidget()); } page.getCOSObject().setNeedToBeUpdate(true); } /** * This will add a signaturefield to the document. * @param sigFields are the PDSignatureFields that should be added to the document * @param signatureInterface is a interface which provides signing capabilities * @param options signature options * @throws IOException if there is an error creating required fields * @throws SignatureException */ public void addSignatureField(List sigFields, SignatureInterface signatureInterface, SignatureOptions options) throws IOException, SignatureException { PDDocumentCatalog catalog = getDocumentCatalog(); catalog.getCOSObject().setNeedToBeUpdate(true); PDAcroForm acroForm = catalog.getAcroForm(); if (acroForm == null) { acroForm = new PDAcroForm(this); catalog.setAcroForm(acroForm); } else { acroForm.getCOSObject().setNeedToBeUpdate(true); } COSDictionary acroFormDict = acroForm.getDictionary(); acroFormDict.setDirect(true); acroFormDict.setNeedToBeUpdate(true); if (acroFormDict.getInt(COSName.SIG_FLAGS) < 1) { acroFormDict.setInt(COSName.SIG_FLAGS, 1); // 1 if at least one signature field is available } List field = acroForm.getFields(); for ( PDSignatureField sigField : sigFields ) { PDSignature sigObject = sigField.getSignature(); sigField.getCOSObject().setNeedToBeUpdate(true); // Check if the field already exist boolean checkFields = false; for ( Object obj : field ) { if (obj instanceof PDSignatureField) { if (((PDSignatureField)obj).getCOSObject().equals(sigField.getCOSObject())) { checkFields=true; sigField.getCOSObject().setNeedToBeUpdate(true); break; } } } if (!checkFields) { field.add(sigField); } // Check if we need to add a signature if (sigField.getSignature() != null) { sigField.getCOSObject().setNeedToBeUpdate(true); if (options == null) { } addSignature(sigField.getSignature(), signatureInterface, options); } } } /** * Remove the page from the document. * * @param page The page to remove from the document. * * @return true if the page was found false otherwise. */ public boolean removePage( PDPage page ) { PDPageNode parent = page.getParent(); boolean retval = parent.getKids().remove( page ); if( retval ) { //do a recursive updateCount starting at the root of the document getDocumentCatalog().getPages().updateCount(); } return retval; } /** * Remove the page from the document. * * @param pageNumber 0 based index to page number. * @return true if the page was found false otherwise. */ public boolean removePage( int pageNumber ) { boolean removed = false; List allPages = getDocumentCatalog().getAllPages(); if( allPages.size() > pageNumber) { PDPage page = (PDPage)allPages.get( pageNumber ); removed = removePage( page ); } return removed; } /** * This will import and copy the contents from another location. Currently * the content stream is stored in a scratch file. The scratch file is * associated with the document. If you are adding a page to this document * from another document and want to copy the contents to this document's * scratch file then use this method otherwise just use the addPage method. * * Unlike {@link #addPage}, this method does a deep copy. If your page has annotations, and if * these link to pages not in the target document, then the target document might become huge. * What you need to do is to delete page references of such annotations. See * here for how to do this. * * @param page The page to import. * @return The page that was imported. * * @throws IOException If there is an error copying the page. */ public PDPage importPage( PDPage page ) throws IOException { PDPage importedPage = new PDPage( new COSDictionary( page.getCOSDictionary() ) ); InputStream is = null; OutputStream os = null; try { PDStream src = page.getContents(); if(src != null) { PDStream dest = new PDStream( document.createCOSStream()); dest.addCompression(); importedPage.setContents( dest ); os = dest.createOutputStream(); byte[] buf = new byte[10240]; int amountRead; is = src.createInputStream(); while((amountRead = is.read(buf,0,10240)) > -1) { os.write(buf, 0, amountRead); } } addPage( importedPage ); } finally { if( is != null ) { is.close(); } if( os != null ) { os.close(); } } return importedPage; } /** * Constructor that uses an existing document. The COSDocument that * is passed in must be valid. * * @param doc The COSDocument that this document wraps. */ public PDDocument( COSDocument doc ) { this(doc, null); } /** * Constructor that uses an existing document. The COSDocument that is passed in must be valid. * * @param doc The COSDocument that this document wraps. * @param usedParser the parser which is used to read the pdf */ public PDDocument(COSDocument doc, BaseParser usedParser) { document = doc; parser = usedParser; } /** * This will get the low level document. * * @return The document that this layer sits on top of. */ public COSDocument getDocument() { return document; } /** * This will get the document info dictionary. This is guaranteed to not return null. * * @return The documents /Info dictionary */ public PDDocumentInformation getDocumentInformation() { if( documentInformation == null ) { COSDictionary trailer = document.getTrailer(); COSDictionary infoDic = (COSDictionary)trailer.getDictionaryObject( COSName.INFO ); if( infoDic == null ) { infoDic = new COSDictionary(); trailer.setItem( COSName.INFO, infoDic ); } documentInformation = new PDDocumentInformation( infoDic ); } return documentInformation; } /** * This will set the document information for this document. * * @param info The updated document information. */ public void setDocumentInformation( PDDocumentInformation info ) { documentInformation = info; document.getTrailer().setItem( COSName.INFO, info.getDictionary() ); } /** * This will get the document CATALOG. This is guaranteed to not return null. * * @return The documents /Root dictionary */ public PDDocumentCatalog getDocumentCatalog() { if( documentCatalog == null ) { COSDictionary trailer = document.getTrailer(); COSBase dictionary = trailer.getDictionaryObject( COSName.ROOT ); if (dictionary instanceof COSDictionary) { documentCatalog = new PDDocumentCatalog(this, (COSDictionary) dictionary); } else { documentCatalog = new PDDocumentCatalog(this); } } return documentCatalog; } /** * This will tell if this document is encrypted or not. * * @return true If this document is encrypted. */ public boolean isEncrypted() { return document.isEncrypted(); } /** * This will get the encryption dictionary for this document. This will still * return the parameters if the document was decrypted. If the document was * never encrypted then this will return null. As the encryption architecture * in PDF documents is plugable this returns an abstract class, but the only * supported subclass at this time is a PDStandardEncryption object. * * @return The encryption dictionary(most likely a PDStandardEncryption object) * * @throws IOException If there is an error determining which security handler to use. */ public PDEncryptionDictionary getEncryptionDictionary() throws IOException { if( encParameters == null ) { if( isEncrypted() ) { encParameters = new PDEncryptionDictionary(document.getEncryptionDictionary()); } } return encParameters; } /** * This will set the encryption dictionary for this document. * * @param encDictionary The encryption dictionary(most likely a PDStandardEncryption object) * * @throws IOException If there is an error determining which security handler to use. */ public void setEncryptionDictionary( PDEncryptionDictionary encDictionary ) throws IOException { encParameters = encDictionary; } /** * This will return the last signature. * * @return the last signature as PDSignature. * @throws IOException if no document catalog can be found. * @deprecated use {@link #getLastSignatureDictionary()} instead. */ @Deprecated public PDSignature getSignatureDictionary() throws IOException { return getLastSignatureDictionary(); } /** * This will return the last signature. * * @return the last signature as PDSignature. * @throws IOException if no document catalog can be found. */ public PDSignature getLastSignatureDictionary() throws IOException { List signatureDictionaries = getSignatureDictionaries(); int size = signatureDictionaries.size(); if (size > 0) { return signatureDictionaries.get(size - 1); } return null; } /** * Retrieve all signature fields from the document. * * @return a List of PDSignatureFields * @throws IOException if no document catalog can be found. */ public List getSignatureFields() throws IOException { List fields = new LinkedList(); PDAcroForm acroForm = getDocumentCatalog().getAcroForm(); if (acroForm != null) { List signatureDictionary = document.getSignatureFields(false); for ( COSDictionary dict : signatureDictionary ) { fields.add(new PDSignatureField(acroForm, dict)); } } return fields; } /** * Retrieve all signature dictionaries from the document. * * @return a List of PDSignatures * @throws IOException if no document catalog can be found. */ public List getSignatureDictionaries() throws IOException { List signatureDictionary = document.getSignatureDictionaries(); List signatures = new LinkedList(); for ( COSDictionary dict : signatureDictionary ) { signatures.add(new PDSignature(dict)); } return signatures; } /** * This will determine if this is the user password. This only applies when * the document is encrypted and uses standard encryption. * * @param password The plain text user password. * * @return true If the password passed in matches the user password used to encrypt the document. * * @throws IOException If there is an error determining if it is the user password. * @throws CryptographyException If there is an error in the encryption algorithms. * * @deprecated */ @Deprecated public boolean isUserPassword( String password ) throws IOException, CryptographyException { return false; } /** * This will determine if this is the owner password. This only applies when * the document is encrypted and uses standard encryption. * * @param password The plain text owner password. * * @return true If the password passed in matches the owner password used to encrypt the document. * * @throws IOException If there is an error determining if it is the user password. * @throws CryptographyException If there is an error in the encryption algorithms. * * @deprecated */ @Deprecated public boolean isOwnerPassword( String password ) throws IOException, CryptographyException { return false; } /** * This will decrypt a document. This method is provided for compatibility reasons only. User * should use the new security layer instead and the openProtection method especially. *

* Do not call this method if you have opened your document with one of the * {@link #loadNonSeq(java.io.File, org.apache.pdfbox.io.RandomAccess) loadNonSeq} methods. * * @param password Either the user or owner password. * * @throws CryptographyException If there is an error decrypting the document. * @throws IOException If there is an error getting the stream data. * */ public void decrypt( String password ) throws CryptographyException, IOException { try { StandardDecryptionMaterial m = new StandardDecryptionMaterial(password); this.openProtection(m); } catch(BadSecurityHandlerException e) { throw new CryptographyException(e); } } /** * This will tell if the document was decrypted with the master password. This * entry is invalid if the PDF was not decrypted. * * @return true if the pdf was decrypted with the master password. * * @deprecated use getCurrentAccessPermission instead */ @Deprecated public boolean wasDecryptedWithOwnerPassword() { return false; } /** * This will mark a document to be encrypted. The actual encryption * will occur when the document is saved. * This method is provided for compatibility reasons only. User should use * the new security layer instead and the openProtection method especially. * * @param ownerPassword The owner password to encrypt the document. * @param userPassword The user password to encrypt the document. * * @throws CryptographyException If an error occurs during encryption. * @throws IOException If there is an error accessing the data. * */ public void encrypt( String ownerPassword, String userPassword ) throws CryptographyException, IOException { try { StandardProtectionPolicy policy = new StandardProtectionPolicy(ownerPassword, userPassword, new AccessPermission()); this.protect(policy); } catch(BadSecurityHandlerException e) { throw new CryptographyException(e); } } /** * The owner password that was passed into the encrypt method. You should * never use this method. This will not longer be valid once encryption * has occured. * * @return The owner password passed to the encrypt method. * * @deprecated Do not rely on this method anymore. */ @Deprecated public String getOwnerPasswordForEncryption() { return null; } /** * The user password that was passed into the encrypt method. You should * never use this method. This will not longer be valid once encryption * has occured. * * @return The user password passed to the encrypt method. * * @deprecated Do not rely on this method anymore. */ @Deprecated public String getUserPasswordForEncryption() { return null; } /** * Internal method do determine if the document will be encrypted when it is saved. * * @return True if encrypt has been called and the document * has not been saved yet. * * @deprecated Do not rely on this method anymore. It is the responsibility of * COSWriter to hold this state */ @Deprecated public boolean willEncryptWhenSaving() { return false; } /** * This shoule only be called by the COSWriter after encryption has completed. * * @deprecated Do not rely on this method anymore. It is the responsability of * COSWriter to hold this state. */ @Deprecated public void clearWillEncryptWhenSaving() { //method is deprecated. } /** * This will load a document from a url. * * @param url The url to load the PDF from. * * @return The document that was loaded. * * @throws IOException If there is an error reading from the stream. */ public static PDDocument load( URL url ) throws IOException { return load( url.openStream() ); } /** * This will load a document from a url. Used for skipping corrupt * pdf objects * * @param url The url to load the PDF from. * @param force When true, the parser will skip corrupt pdf objects and * will continue parsing at the next object in the file * * @return The document that was loaded. * * @throws IOException If there is an error reading from the stream. */ public static PDDocument load(URL url, boolean force) throws IOException { return load(url.openStream(), force); } /** * This will load a document from a url. * * @param url The url to load the PDF from. * @param scratchFile A location to store temp PDFBox data for this document. * * @return The document that was loaded. * * @throws IOException If there is an error reading from the stream. */ public static PDDocument load( URL url, RandomAccess scratchFile ) throws IOException { return load( url.openStream(), scratchFile ); } /** * This will load a document from a file. * * @param filename The name of the file to load. * * @return The document that was loaded. * * @throws IOException If there is an error reading from the stream. */ public static PDDocument load( String filename ) throws IOException { return load( new FileInputStream( filename ) ); } /** * This will load a document from a file. Allows for skipping corrupt pdf * objects * * @param filename The name of the file to load. * @param force When true, the parser will skip corrupt pdf objects and * will continue parsing at the next object in the file * * @return The document that was loaded. * * @throws IOException If there is an error reading from the stream. */ public static PDDocument load(String filename, boolean force) throws IOException { return load(new FileInputStream( filename ), force); } /** * This will load a document from a file. * * @param filename The name of the file to load. * @param scratchFile A location to store temp PDFBox data for this document. * * @return The document that was loaded. * * @throws IOException If there is an error reading from the stream. */ public static PDDocument load( String filename, RandomAccess scratchFile ) throws IOException { return load( new FileInputStream( filename ), scratchFile ); } /** * This will load a document from a file. * * @param file The name of the file to load. * * @return The document that was loaded. * * @throws IOException If there is an error reading from the stream. */ public static PDDocument load( File file ) throws IOException { return load( new FileInputStream( file ) ); } /** * This will load a document from a file. * * @param file The name of the file to load. * @param scratchFile A location to store temp PDFBox data for this document. * * @return The document that was loaded. * * @throws IOException If there is an error reading from the stream. */ public static PDDocument load( File file, RandomAccess scratchFile ) throws IOException { return load( new FileInputStream( file ), scratchFile ); } /** * This will load a document from an input stream. * * @param input The stream that contains the document. * * @return The document that was loaded. * * @throws IOException If there is an error reading from the stream. */ public static PDDocument load( InputStream input ) throws IOException { return load( input, null ); } /** * This will load a document from an input stream. * Allows for skipping corrupt pdf objects * * @param input The stream that contains the document. * @param force When true, the parser will skip corrupt pdf objects and * will continue parsing at the next object in the file * * @return The document that was loaded. * * @throws IOException If there is an error reading from the stream. */ public static PDDocument load(InputStream input, boolean force) throws IOException { return load(input, null, force); } /** * This will load a document from an input stream. * * @param input The stream that contains the document. * @param scratchFile A location to store temp PDFBox data for this document. * * @return The document that was loaded. * * @throws IOException If there is an error reading from the stream. */ public static PDDocument load( InputStream input, RandomAccess scratchFile ) throws IOException { PDFParser parser = new PDFParser(input, scratchFile); parser.parse(); return parser.getPDDocument(); } /** * This will load a document from an input stream. Allows for skipping corrupt pdf objects * * @param input The stream that contains the document. * @param scratchFile A location to store temp PDFBox data for this document. * @param force When true, the parser will skip corrupt pdf objects and * will continue parsing at the next object in the file * * @return The document that was loaded. * * @throws IOException If there is an error reading from the stream. */ public static PDDocument load(InputStream input, RandomAccess scratchFile, boolean force) throws IOException { PDFParser parser = new PDFParser(input, scratchFile, force); parser.parse(); return parser.getPDDocument(); } /** * Parses PDF with the new non sequential parser and an empty password. * * @param file file to be loaded * @param scratchFile location to store temp PDFBox data for this document * * @return loaded document * * @throws IOException in case of a file reading or parsing error */ public static PDDocument loadNonSeq( File file, RandomAccess scratchFile ) throws IOException { return loadNonSeq( file, scratchFile, "" ); } /** * Parses PDF with the new non sequential parser and an empty password. * * @param file file to be loaded * @param scratchFile location to store temp PDFBox data for this document * @param password password to be used for decryption * * @return loaded document * * @throws IOException in case of a file reading or parsing error */ public static PDDocument loadNonSeq( File file, RandomAccess scratchFile, String password ) throws IOException { NonSequentialPDFParser parser = new NonSequentialPDFParser( file, scratchFile, password ); parser.parse(); return parser.getPDDocument(); } /** * Parses PDF with the new non sequential parser. * * @param input stream that contains the document. * @param scratchFile location to store temp PDFBox data for this document * * @return loaded document * * @throws IOException in case of a file reading or parsing error */ public static PDDocument loadNonSeq( InputStream input, RandomAccess scratchFile) throws IOException { return loadNonSeq(input, scratchFile, ""); } /** * Parses PDF with the new non sequential parser. * * @param input stream that contains the document. * @param scratchFile location to store temp PDFBox data for this document * @param password password to be used for decryption * * @return loaded document * * @throws IOException in case of a file reading or parsing error */ public static PDDocument loadNonSeq( InputStream input, RandomAccess scratchFile, String password ) throws IOException { NonSequentialPDFParser parser = new NonSequentialPDFParser( input, scratchFile, password ); parser.parse(); return parser.getPDDocument(); } /** * Save the document to a file. * * @param fileName The file to save as. * * @throws IOException If there is an error saving the document. * @throws COSVisitorException If an error occurs while generating the data. */ public void save( String fileName ) throws IOException, COSVisitorException { save( new File( fileName ) ); } /** * Save the document to a file. * * @param file The file to save as. * * @throws IOException If there is an error saving the document. * @throws COSVisitorException If an error occurs while generating the data. */ public void save( File file ) throws IOException, COSVisitorException { save( new FileOutputStream( file ) ); } /** * This will save the document to an output stream. * * @param output The stream to write to. * * @throws IOException If there is an error writing the document. * @throws COSVisitorException If an error occurs while generating the data. */ public void save( OutputStream output ) throws IOException, COSVisitorException { //update the count in case any pages have been added behind the scenes. getDocumentCatalog().getPages().updateCount(); COSWriter writer = null; try { writer = new COSWriter( output ); writer.write( this ); } finally { if( writer != null ) { writer.close(); } } } /** * Save the pdf as incremental. * * @param fileName the filename to be used * @throws IOException if something went wrong * @throws COSVisitorException if something went wrong */ public void saveIncremental( String fileName ) throws IOException, COSVisitorException { saveIncremental(new BufferedInputStream(new FileInputStream(fileName)), new BufferedOutputStream(new FileOutputStream(fileName, true))); } /** * Save the pdf as incremental. * * @param input * @param output * @throws IOException if something went wrong * @throws COSVisitorException if something went wrong */ public void saveIncremental(InputStream input, OutputStream output) throws IOException, COSVisitorException { //update the count in case any pages have been added behind the scenes. getDocumentCatalog().getPages().updateCount(); COSWriter writer = null; try { // Sometimes the original file will be missing a newline at the end // In order to avoid having %%EOF the first object on the same line // as the %%EOF, we put a newline here. If there's already one at // the end of the file, an extra one won't hurt. PDFBOX-1051 output.write("\r\n".getBytes("ISO-8859-1")); writer = new COSWriter( output, input ); writer.write( this ); } finally { if( writer != null ) { writer.close(); } } } /** * This will return the total page count of the PDF document. Note: This method * is deprecated in favor of the getNumberOfPages method. The getNumberOfPages is * a required interface method of the Pageable interface. This method will * be removed in a future version of PDFBox!! * * @return The total number of pages in the PDF document. * @deprecated Use the getNumberOfPages method instead! */ @Deprecated public int getPageCount() { return getNumberOfPages(); } /** * {@inheritDoc} */ public int getNumberOfPages() { PDDocumentCatalog cat = getDocumentCatalog(); return (int)cat.getPages().getCount(); } /** * Returns the format of the page at the given index when using a * default printer job returned by {@link PrinterJob#getPrinterJob()}. * * @deprecated Use the {@link PDPageable} adapter class * @param pageIndex page index, zero-based * @return page format */ @Deprecated public PageFormat getPageFormat(int pageIndex) { try { PrinterJob printerJob = PrinterJob.getPrinterJob(); return new PDPageable(this, printerJob).getPageFormat(pageIndex); } catch (PrinterException e) { throw new RuntimeException(e); } } /** * {@inheritDoc} */ public Printable getPrintable(int pageIndex) { return (Printable)getDocumentCatalog().getAllPages().get( pageIndex ); } /** * @see PDDocument#print() * * @param printJob The printer job. * * @throws PrinterException If there is an error while sending the PDF to * the printer, or you do not have permissions to print this document. */ public void print(PrinterJob printJob) throws PrinterException { print(printJob, false); } /** * This will send the PDF document to a printer. The printing functionality * depends on the org.apache.pdfbox.pdfviewer.PageDrawer functionality. The PageDrawer * is a work in progress and some PDFs will print correctly and some will * not. This is a convenience method to create the java.awt.print.PrinterJob. * The PDDocument implements the java.awt.print.Pageable interface and * PDPage implementes the java.awt.print.Printable interface, so advanced printing * capabilities can be done by using those interfaces instead of this method. * * @throws PrinterException If there is an error while sending the PDF to * the printer, or you do not have permissions to print this document. */ public void print() throws PrinterException { print( PrinterJob.getPrinterJob() ); } /** * This will send the PDF to the default printer without prompting the user * for any printer settings. * * @see PDDocument#print() * * @throws PrinterException If there is an error while printing. */ public void silentPrint() throws PrinterException { silentPrint( PrinterJob.getPrinterJob() ); } /** * This will send the PDF to the default printer without prompting the user * for any printer settings. * * @param printJob A printer job definition. * @see PDDocument#print() * * @throws PrinterException If there is an error while printing. */ public void silentPrint( PrinterJob printJob ) throws PrinterException { print(printJob, true); } private void print(PrinterJob job, boolean silent) throws PrinterException { if (job == null) { throw new PrinterException("The given printer job is null."); } else { job.setPageable(new PDPageable(this, job)); if (silent || job.printDialog()) { job.print(); } } } /** * This will close the underlying COSDocument object. * * @throws IOException If there is an error releasing resources. */ public void close() throws IOException { documentCatalog = null; documentInformation = null; encParameters = null; if (pageMap != null) { pageMap.clear(); pageMap = null; } securityHandler = null; if (document != null) { document.close(); document = null; } if (parser != null) { parser.clearResources(); parser = null; } } /** * Protects the document with the protection policy pp. The document content will be really encrypted * when it will be saved. This method only marks the document for encryption. * * @see org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy * @see org.apache.pdfbox.pdmodel.encryption.PublicKeyProtectionPolicy * * @param pp The protection policy. * * @throws BadSecurityHandlerException If there is an error during protection. */ public void protect(ProtectionPolicy pp) throws BadSecurityHandlerException { SecurityHandler handler = SecurityHandlersManager.getInstance().getSecurityHandler(pp); securityHandler = handler; } /** * Tries to decrypt the document in memory using the provided decryption material. *

* Do not call this method if you have opened your document with one of the * {@link #loadNonSeq(java.io.File, org.apache.pdfbox.io.RandomAccess) loadNonSeq} methods. * * * @see org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial * @see org.apache.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial * * @param pm The decryption material (password or certificate). * * @throws BadSecurityHandlerException If there is an error during decryption. * @throws IOException If there is an error reading cryptographic information. * @throws CryptographyException If there is an error during decryption. */ public void openProtection(DecryptionMaterial pm) throws BadSecurityHandlerException, IOException, CryptographyException { PDEncryptionDictionary dict = this.getEncryptionDictionary(); if(dict.getFilter() != null) { securityHandler = SecurityHandlersManager.getInstance().getSecurityHandler(dict.getFilter()); securityHandler.decryptDocument(this, pm); document.dereferenceObjectStreams(); document.setEncryptionDictionary( null ); getDocumentCatalog(); } else { throw new RuntimeException("This document does not need to be decrypted"); } } /** * Returns the access permissions granted when the document was decrypted. * If the document was not decrypted this method returns the access permission * for a document owner (ie can do everything). * The returned object is in read only mode so that permissions cannot be changed. * Methods providing access to content should rely on this object to verify if the current * user is allowed to proceed. * * @return the access permissions for the current user on the document. */ public AccessPermission getCurrentAccessPermission() { if(this.securityHandler == null) { if (isEncrypted()) { LOG.info("the document has not yet been decrypted, returning access permission for a document owner"); } return AccessPermission.getOwnerAccessPermission(); } return securityHandler.getCurrentAccessPermission(); } /** * Get the security handler that is used for document encryption. * * @return The handler used to encrypt/decrypt the document. */ public SecurityHandler getSecurityHandler() { return securityHandler; } /** * Sets security handler if none is set already. * * @param secHandler security handler to be assigned to document * @return true if security handler was set, false * otherwise (a security handler was already set) */ public boolean setSecurityHandler(SecurityHandler secHandler) { if ( securityHandler == null ) { securityHandler = secHandler; return true; } return false; } /** * Indicates if all security is removed or not when writing the pdf. * @return returns true if all security shall be removed otherwise false */ public boolean isAllSecurityToBeRemoved() { return allSecurityToBeRemoved; } /** * Activates/Deactivates the removal of all security when writing the pdf. * * @param removeAllSecurity remove all security if set to true */ public void setAllSecurityToBeRemoved(boolean removeAllSecurity) { allSecurityToBeRemoved = removeAllSecurity; } public Long getDocumentId() { return documentId; } public void setDocumentId(Long docId) { documentId = docId; } }