All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pdfbox.pdmodel.PDDocument Maven / Gradle / Ivy

Go to download

The Apache PDFBox library is an open source Java tool for working with PDF documents.

There is a newer version: 3.0.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.pdmodel;

import java.awt.Point;
import java.awt.image.DataBuffer;
import java.awt.image.Raster;
import java.awt.image.WritableRaster;
import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.ttf.TrueTypeFont;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.ScratchFile;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdfwriter.COSWriter;
import org.apache.pdfbox.pdmodel.common.COSArrayList;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
import org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandlerFactory;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.ExternalSigningSupport;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SigningSupport;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField;

/**
 * This is the in-memory representation of the PDF document.
 * The #close() method must be called once the document is no longer needed.
 * 
 * @author Ben Litchfield
 */
public class PDDocument implements Closeable
{
    /**
     * For signing: large reserve byte range used as placeholder in the saved PDF until the actual
     * length of the PDF is known. You'll need to fetch (with
     * {@link PDSignature#getByteRange()} ) and reassign this yourself (with
     * {@link PDSignature#setByteRange(int[])} ) only if you call
     * {@link #saveIncrementalForExternalSigning(java.io.OutputStream) saveIncrementalForExternalSigning()}
     * twice.
     */
    private static final int[] RESERVE_BYTE_RANGE = new int[] { 0, 1000000000, 1000000000, 1000000000 };

    private static final Log LOG = LogFactory.getLog(PDDocument.class);

    /**
     * avoid concurrency issues with PDDeviceRGB and deadlock in COSNumber/COSInteger
     */
    static
    {
    	try
        {
            WritableRaster raster = Raster.createBandedRaster(DataBuffer.TYPE_BYTE, 1, 1, 3, new Point(0, 0));
            PDDeviceRGB.INSTANCE.toRGBImage(raster);
        }
        catch (IOException ex)
        {
            LOG.debug("voodoo error", ex);
        }

        try
        {
            COSNumber.get("0");
            COSNumber.get("1");
        }
        catch (IOException ex)
        {
            //
        }
    }
    
    private final COSDocument document;

    // cached values
    private PDDocumentInformation documentInformation;
    private PDDocumentCatalog documentCatalog;

    // the encryption will be cached here. When the document is decrypted then
    // the COSDocument will not have an "Encrypt" dictionary anymore and this object must be used
    private PDEncryption encryption;

    // holds a flag which tells us if we should remove all security from this documents.
    private boolean allSecurityToBeRemoved;

    // keep tracking customized documentId for the trailer. If null, a new id will be generated
    // this ID doesn't represent the actual documentId from the trailer
    private Long documentId;

    // the pdf to be read
    private final RandomAccessRead pdfSource;

    // the access permissions of the document
    private AccessPermission accessPermission;
    
    // fonts to subset before saving
    private final Set fontsToSubset = new HashSet();

    // fonts to close when closing document
    private final Set fontsToClose = new HashSet();

    // Signature interface
    private SignatureInterface signInterface;

    // helper class used to create external signature
    private SigningSupport signingSupport;

    // document-wide cached resources
    private ResourceCache resourceCache = new DefaultResourceCache();
    
    /**
     * Creates an empty PDF document.
     * You need to add at least one page for the document to be valid.
     */
    public PDDocument()
    {
        this(MemoryUsageSetting.setupMainMemoryOnly());
    }

    /**
     * Creates an empty PDF document.
     * You need to add at least one page for the document to be valid.
     *
     * @param memUsageSetting defines how memory is used for buffering PDF streams 
     */
    public PDDocument(MemoryUsageSetting memUsageSetting)
    {
        ScratchFile scratchFile = null;
        try
        {
            scratchFile = new ScratchFile(memUsageSetting);
        }
        catch (IOException ioe)
        {
            LOG.warn("Error initializing scratch file: " + ioe.getMessage() +
                     ". Fall back to main memory usage only.");
            try
            {
                scratchFile = new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly());
            }
            catch (IOException ioe2) {}
        }
        
        document = new COSDocument(scratchFile);
        pdfSource = null;

        // First we need a trailer
        COSDictionary trailer = new COSDictionary();
        document.setTrailer(trailer);

        // Next we need the root dictionary.
        COSDictionary rootDictionary = new COSDictionary();
        trailer.setItem(COSName.ROOT, rootDictionary);
        rootDictionary.setItem(COSName.TYPE, COSName.CATALOG);
        rootDictionary.setItem(COSName.VERSION, COSName.getPDFName("1.4"));

        // next we need the pages tree structure
        COSDictionary pages = new COSDictionary();
        rootDictionary.setItem(COSName.PAGES, pages);
        pages.setItem(COSName.TYPE, COSName.PAGES);
        COSArray kidsArray = new COSArray();
        pages.setItem(COSName.KIDS, kidsArray);
        pages.setItem(COSName.COUNT, COSInteger.ZERO);
    }

    /**
     * This will add a page to the document. This is a convenience method, that will add the page to the root of the
     * hierarchy and set the parent of the page to the root.
     * 
     * @param page The page to add to the document.
     */
    public void addPage(PDPage page)
    {
        getPages().add(page);
    }

    /**
     * Add parameters of signature to be created externally using default signature options. See
     * {@link #saveIncrementalForExternalSigning(OutputStream)} method description on external
     * signature creation scenario details.
     *
     * @param sigObject is the PDSignatureField model
     * @throws IOException if there is an error creating required fields
     */
    public void addSignature(PDSignature sigObject) throws IOException
    {
        addSignature(sigObject, new SignatureOptions());
    }

    /**
     * Add parameters of signature to be created externally. See
     * {@link #saveIncrementalForExternalSigning(OutputStream)} method description on external
     * signature creation scenario details.
     *
     * @param sigObject is the PDSignatureField model
     * @param options signature options
     * @throws IOException if there is an error creating required fields
     */
    public void addSignature(PDSignature sigObject, SignatureOptions options) throws IOException
    {
        addSignature(sigObject, null, options);
    }

    /**
     * Add a signature to be created using the instance of given interface.
     * 
     * @param sigObject is the PDSignatureField model
     * @param signatureInterface is an interface which provides signing capabilities
     * @throws IOException if there is an error creating required fields
     */
    public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface) throws IOException
    {
        addSignature(sigObject, signatureInterface, new SignatureOptions());
    }

    /**
     * This will add a signature to the document. If the 0-based page number in the options
     * parameter is smaller than 0 or larger than max, the nearest valid page number will be used
     * (i.e. 0 or max) and no exception will be thrown.
     *
     * @param sigObject is the PDSignatureField model
     * @param signatureInterface is an interface which provides signing capabilities
     * @param options signature options
     * @throws IOException if there is an error creating required fields
     */
    public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface,
                             SignatureOptions options) throws IOException
    {
        // Reserve content
        // We need to reserve some space for the signature. Some signatures including
        // big certificate chain and we need enough space to store it.
        int preferredSignatureSize = options.getPreferredSignatureSize();
        if (preferredSignatureSize > 0)
        {
            sigObject.setContents(new byte[preferredSignatureSize]);
        }
        else
        {
            sigObject.setContents(new byte[SignatureOptions.DEFAULT_SIGNATURE_SIZE]);
        }

        // Reserve ByteRange, will be overwritten in COSWriter
        sigObject.setByteRange(RESERVE_BYTE_RANGE);

        signInterface = signatureInterface;

        // Create SignatureForm for signature and append it to the document

        // Get the first valid page
        int pageCount = getNumberOfPages();
        if (pageCount == 0)
        {
            throw new IllegalStateException("Cannot sign an empty document");
        }

        int startIndex = Math.min(Math.max(options.getPage(), 0), pageCount - 1);
        PDPage page = getPage(startIndex);

        // Get the AcroForm from the Root-Dictionary and append the annotation
        PDDocumentCatalog catalog = getDocumentCatalog();
        PDAcroForm acroForm = catalog.getAcroForm();
        catalog.getCOSObject().setNeedToBeUpdated(true);

        if (acroForm == null)
        {
            acroForm = new PDAcroForm(this);
            catalog.setAcroForm(acroForm);
        }
        else
        {
            acroForm.getCOSObject().setNeedToBeUpdated(true);
        }

        PDSignatureField signatureField = null;
        if (!(acroForm.getCOSObject().getDictionaryObject(COSName.FIELDS) instanceof COSArray))
        {
            acroForm.getCOSObject().setItem(COSName.FIELDS, new COSArray());
        }
        else
        {
            COSArray fieldArray = (COSArray) acroForm.getCOSObject().getDictionaryObject(COSName.FIELDS);
            fieldArray.setNeedToBeUpdated(true);
            signatureField = findSignatureField(acroForm.getFieldIterator(), sigObject);
        }
        if (signatureField == null)
        {
            signatureField = new PDSignatureField(acroForm);
            // append the signature object
            signatureField.setValue(sigObject);
            // backward linking
            signatureField.getWidgets().get(0).setPage(page);
        }
        else
        {
            sigObject.getCOSObject().setNeedToBeUpdated(true);
        }

        // TODO This "overwrites" the settings of the original signature field which might not be intended by the user
        // better make it configurable (not all users need/want PDF/A but their own setting):

        // to conform PDF/A-1 requirement:
        // The /F key's Print flag bit shall be set to 1 and 
        // its Hidden, Invisible and NoView flag bits shall be set to 0
        signatureField.getWidgets().get(0).setPrinted(true);

        // Set the AcroForm Fields
        List acroFormFields = acroForm.getFields();
        acroForm.getCOSObject().setDirect(true);
        acroForm.setSignaturesExist(true);
        acroForm.setAppendOnly(true);

        boolean checkFields = checkSignatureField(acroForm.getFieldIterator(), signatureField);
        if (checkFields)
        {
            signatureField.getCOSObject().setNeedToBeUpdated(true);
        }
        else
        {
            acroFormFields.add(signatureField);
        }

        // Get the object from the visual signature
        COSDocument visualSignature = options.getVisualSignature();

        // Distinction of case for visual and non-visual signature
        if (visualSignature == null)
        {
            prepareNonVisibleSignature(signatureField);
            return;
        }

        prepareVisibleSignature(signatureField, acroForm, visualSignature);

        // Create Annotation / Field for signature
        List annotations = page.getAnnotations();

        // Make /Annots a direct object to avoid problem if it is an existing indirect object: 
        // it would not be updated in incremental save, and if we'd set the /Annots array "to be updated" 
        // while keeping it indirect, Adobe Reader would claim that the document had been modified.
        page.setAnnotations(annotations);

        // Get the annotations of the page and append the signature-annotation to it
        // take care that page and acroforms do not share the same array (if so, we don't need to add it twice)
        if (!(annotations instanceof COSArrayList &&
              acroFormFields instanceof COSArrayList &&
              ((COSArrayList) annotations).toList().equals(((COSArrayList) acroFormFields).toList()) &&
              checkFields))
        {
            PDAnnotationWidget widget = signatureField.getWidgets().get(0);
            // use check to prevent the annotation widget from appearing twice
            if (checkSignatureAnnotation(annotations, widget))
            {
                widget.getCOSObject().setNeedToBeUpdated(true);
            }
            else
            {
                annotations.add(widget);
            }   
        }
        page.getCOSObject().setNeedToBeUpdated(true);
    }

    /**
     * Search acroform fields for signature field with specific signature dictionary.
     * 
     * @param fieldIterator iterator on all fields.
     * @param sigObject signature object (the /V part).
     * @return a signature field if found, or null if none was found.
     */
    private PDSignatureField findSignatureField(Iterator fieldIterator, PDSignature sigObject)
    {
        PDSignatureField signatureField = null;
        while (fieldIterator.hasNext())
        {
            PDField pdField = fieldIterator.next();
            if (pdField instanceof PDSignatureField)
            {
                PDSignature signature = ((PDSignatureField) pdField).getSignature();
                if (signature != null && signature.getCOSObject().equals(sigObject.getCOSObject()))
                {
                    signatureField = (PDSignatureField) pdField;
                }
            }
        }
        return signatureField;
    }

    /**
     * Check if the field already exists in the field list.
     *
     * @param fieldIterator iterator on all fields.
     * @param signatureField the signature field.
     * @return true if the field already existed in the field list, false if not.
     */
    private boolean checkSignatureField(Iterator fieldIterator, PDSignatureField signatureField)
    {
        while (fieldIterator.hasNext())
        {
            PDField field = fieldIterator.next();
            if (field instanceof PDSignatureField
                    && field.getCOSObject().equals(signatureField.getCOSObject()))
            {
                return true;
            }
        }
        return false;
    }

    /**
     * Check if the widget already exists in the annotation list
     *
     * @param acroFormFields the list of AcroForm fields.
     * @param signatureField the signature field.
     * @return true if the widget already existed in the annotation list, false if not.
     */
    private boolean checkSignatureAnnotation(List annotations, PDAnnotationWidget widget)
    {
        for (PDAnnotation annotation : annotations)
        {
            if (annotation.getCOSObject().equals(widget.getCOSObject()))
            {
                return true;
            }
        }
        return false;
    }

    private void prepareVisibleSignature(PDSignatureField signatureField, PDAcroForm acroForm, 
            COSDocument visualSignature)
    {
        // Obtain visual signature object
        boolean annotNotFound = true;
        boolean sigFieldNotFound = true;
        for (COSObject cosObject : visualSignature.getObjects())
        {
            if (!annotNotFound && !sigFieldNotFound)
            {
                break;
            }
            
            COSBase base = cosObject.getObject();
            if (base instanceof COSDictionary)
            {
                COSDictionary cosBaseDict = (COSDictionary) base;

                // Search for signature annotation
                COSBase type = cosBaseDict.getDictionaryObject(COSName.TYPE);
                if (annotNotFound && COSName.ANNOT.equals(type))
                {
                    assignSignatureRectangle(signatureField, cosBaseDict);
                    annotNotFound = false;
                }

                // Search for signature field
                COSBase fieldType = cosBaseDict.getDictionaryObject(COSName.FT);
                COSBase apDict = cosBaseDict.getDictionaryObject(COSName.AP);
                if (sigFieldNotFound && COSName.SIG.equals(fieldType) && apDict instanceof COSDictionary)
                {
                    assignAppearanceDictionary(signatureField, (COSDictionary) apDict);
                    assignAcroFormDefaultResource(acroForm, cosBaseDict);
                    sigFieldNotFound = false;
                }
            }
        }
        
        if (annotNotFound || sigFieldNotFound)
        {
            throw new IllegalArgumentException("Template is missing required objects");
        }
    }

    private void assignSignatureRectangle(PDSignatureField signatureField, COSDictionary annotDict)
    {
        // Read and set the rectangle for visual signature
        COSArray rectArray = (COSArray) annotDict.getDictionaryObject(COSName.RECT);
        PDRectangle rect = new PDRectangle(rectArray);
        PDRectangle existingRectangle = signatureField.getWidgets().get(0).getRectangle();

        //in case of an existing field keep the original rect
        if (existingRectangle == null || existingRectangle.getCOSArray().size() != 4)
        {
            signatureField.getWidgets().get(0).setRectangle(rect);
        }
    }

    private void assignAppearanceDictionary(PDSignatureField signatureField, COSDictionary apDict)
    {
        // read and set Appearance Dictionary
        PDAppearanceDictionary ap = new PDAppearanceDictionary(apDict);
        apDict.setDirect(true);
        signatureField.getWidgets().get(0).setAppearance(ap);
    }

    private void assignAcroFormDefaultResource(PDAcroForm acroForm, COSDictionary newDict)
    {
        // read and set/update AcroForm default resource dictionary /DR if available
        COSBase newBase = newDict.getDictionaryObject(COSName.DR);
        if (newBase instanceof COSDictionary)
        {
            COSDictionary newDR = (COSDictionary) newBase;
            PDResources defaultResources = acroForm.getDefaultResources();
            if (defaultResources == null)
            {
                acroForm.getCOSObject().setItem(COSName.DR, newDR);
                newDR.setDirect(true);
                newDR.setNeedToBeUpdated(true);            
            }
            else
            {
                COSDictionary oldDR = defaultResources.getCOSObject();
                COSBase newXObjectBase = newDR.getItem(COSName.XOBJECT);
                COSBase oldXObjectBase = oldDR.getItem(COSName.XOBJECT);
                if (newXObjectBase instanceof COSDictionary &&
                    oldXObjectBase instanceof COSDictionary)
                {
                    ((COSDictionary) oldXObjectBase).addAll((COSDictionary) newXObjectBase);
                    oldDR.setNeedToBeUpdated(true);
                }
            }
        }
    }

    private void prepareNonVisibleSignature(PDSignatureField signatureField)
            throws IOException
    {
        // "Signature fields that are not intended to be visible shall
        // have an annotation rectangle that has zero height and width."
        // Set rectangle for non-visual signature to rectangle array [ 0 0 0 0 ]
        signatureField.getWidgets().get(0).setRectangle(new PDRectangle());
    }

    /**
     * This will add a list of signature fields to the document.
     * 
     * @param sigFields are the PDSignatureFields that should be added to the document
     * @param signatureInterface is a interface which provides signing capabilities
     * @param options signature options
     * @throws IOException if there is an error creating required fields
     */
    public void addSignatureField(List sigFields, SignatureInterface signatureInterface,
            SignatureOptions options) throws IOException
    {
        PDDocumentCatalog catalog = getDocumentCatalog();
        catalog.getCOSObject().setNeedToBeUpdated(true);

        PDAcroForm acroForm = catalog.getAcroForm();
        if (acroForm == null)
        {
            acroForm = new PDAcroForm(this);
            catalog.setAcroForm(acroForm);
        }
        COSDictionary acroFormDict = acroForm.getCOSObject();
        acroFormDict.setDirect(true);
        acroFormDict.setNeedToBeUpdated(true);
        if (!acroForm.isSignaturesExist())
        {
            // 1 if at least one signature field is available
            acroForm.setSignaturesExist(true); 
        }

        List acroformFields = acroForm.getFields();

        for (PDSignatureField sigField : sigFields)
        {
            sigField.getCOSObject().setNeedToBeUpdated(true);
            
            // Check if the field already exists
            boolean checkSignatureField = checkSignatureField(acroForm.getFieldIterator(), sigField);
            if (checkSignatureField)
            {
                sigField.getCOSObject().setNeedToBeUpdated(true);
            }
            else
            {
                acroformFields.add(sigField);
            }

            // Check if we need to add a signature
            if (sigField.getSignature() != null)
            {
                sigField.getCOSObject().setNeedToBeUpdated(true);
                if (options == null)
                {
                    // TODO ??
                }
                addSignature(sigField.getSignature(), signatureInterface, options);
            }
        }
    }

    /**
     * Remove the page from the document.
     * 
     * @param page The page to remove from the document.
     */
    public void removePage(PDPage page)
    {
        getPages().remove(page);
    }

    /**
     * Remove the page from the document.
     * 
     * @param pageNumber 0 based index to page number.
     */
    public void removePage(int pageNumber)
    {
        getPages().remove(pageNumber);
    }

    /**
     * This will import and copy the contents from another location. Currently the content stream is stored in a scratch
     * file. The scratch file is associated with the document. If you are adding a page to this document from another
     * document and want to copy the contents to this
     * document's scratch file then use this method otherwise just use the {@link #addPage addPage}
     * method.
     * 

* Unlike {@link #addPage addPage}, this method creates a new PDPage object. If your page has * annotations, and if these link to pages not in the target document, then the target document * might become huge. What you need to do is to delete page references of such annotations. See * here for how to do this. *

* Inherited (global) resources are ignored. If you need them, call * importedPage.setRotation(page.getRotation()); * * @param page The page to import. * @return The page that was imported. * * @throws IOException If there is an error copying the page. */ public PDPage importPage(PDPage page) throws IOException { PDPage importedPage = new PDPage(new COSDictionary(page.getCOSObject()), resourceCache); PDStream dest = new PDStream(this, page.getContents(), COSName.FLATE_DECODE); importedPage.setContents(dest); addPage(importedPage); importedPage.setCropBox(page.getCropBox()); importedPage.setMediaBox(page.getMediaBox()); importedPage.setRotation(page.getRotation()); if (page.getResources() != null && !page.getCOSObject().containsKey(COSName.RESOURCES)) { LOG.warn("inherited resources of source document are not imported to destination page"); LOG.warn("call importedPage.setResources(page.getResources()) to do this"); } return importedPage; } /** * Constructor that uses an existing document. The COSDocument that is passed in must be valid. * * @param doc The COSDocument that this document wraps. */ public PDDocument(COSDocument doc) { this(doc, null); } /** * Constructor that uses an existing document. The COSDocument that is passed in must be valid. * * @param doc The COSDocument that this document wraps. * @param source the parser which is used to read the pdf */ public PDDocument(COSDocument doc, RandomAccessRead source) { this(doc, source, null); } /** * Constructor that uses an existing document. The COSDocument that is passed in must be valid. * * @param doc The COSDocument that this document wraps. * @param source the parser which is used to read the pdf * @param permission he access permissions of the pdf * */ public PDDocument(COSDocument doc, RandomAccessRead source, AccessPermission permission) { document = doc; pdfSource = source; accessPermission = permission; } /** * This will get the low level document. * * @return The document that this layer sits on top of. */ public COSDocument getDocument() { return document; } /** * This will get the document info dictionary. This is guaranteed to not return null. * * @return The documents /Info dictionary */ public PDDocumentInformation getDocumentInformation() { if (documentInformation == null) { COSDictionary trailer = document.getTrailer(); COSDictionary infoDic = (COSDictionary) trailer.getDictionaryObject(COSName.INFO); if (infoDic == null) { infoDic = new COSDictionary(); trailer.setItem(COSName.INFO, infoDic); } documentInformation = new PDDocumentInformation(infoDic); } return documentInformation; } /** * This will set the document information for this document. * * @param info The updated document information. */ public void setDocumentInformation(PDDocumentInformation info) { documentInformation = info; document.getTrailer().setItem(COSName.INFO, info.getCOSObject()); } /** * This will get the document CATALOG. This is guaranteed to not return null. * * @return The documents /Root dictionary */ public PDDocumentCatalog getDocumentCatalog() { if (documentCatalog == null) { COSDictionary trailer = document.getTrailer(); COSBase dictionary = trailer.getDictionaryObject(COSName.ROOT); if (dictionary instanceof COSDictionary) { documentCatalog = new PDDocumentCatalog(this, (COSDictionary) dictionary); } else { documentCatalog = new PDDocumentCatalog(this); } } return documentCatalog; } /** * This will tell if this document is encrypted or not. * * @return true If this document is encrypted. */ public boolean isEncrypted() { return document.isEncrypted(); } /** * This will get the encryption dictionary for this document. This will still return the parameters if the document * was decrypted. As the encryption architecture in PDF documents is plugable this returns an abstract class, * but the only supported subclass at this time is a * PDStandardEncryption object. * * @return The encryption dictionary(most likely a PDStandardEncryption object) */ public PDEncryption getEncryption() { if (encryption == null && isEncrypted()) { encryption = new PDEncryption(document.getEncryptionDictionary()); } return encryption; } /** * This will set the encryption dictionary for this document. * * @param encryption The encryption dictionary(most likely a PDStandardEncryption object) * * @throws IOException If there is an error determining which security handler to use. */ public void setEncryptionDictionary(PDEncryption encryption) throws IOException { this.encryption = encryption; } /** * This will return the last signature from the field tree. Note that this may not be the * last in time when empty signature fields are created first but signed after other fields. * * @return the last signature as PDSignatureField. * @throws IOException if no document catalog can be found. */ public PDSignature getLastSignatureDictionary() throws IOException { List signatureDictionaries = getSignatureDictionaries(); int size = signatureDictionaries.size(); if (size > 0) { return signatureDictionaries.get(size - 1); } return null; } /** * Retrieve all signature fields from the document. * * @return a List of PDSignatureFields * @throws IOException if no document catalog can be found. */ public List getSignatureFields() throws IOException { List fields = new ArrayList(); PDAcroForm acroForm = getDocumentCatalog().getAcroForm(); if (acroForm != null) { for (PDField field : acroForm.getFieldTree()) { if (field instanceof PDSignatureField) { fields.add((PDSignatureField)field); } } } return fields; } /** * Retrieve all signature dictionaries from the document. * * @return a List of PDSignatureFields * @throws IOException if no document catalog can be found. */ public List getSignatureDictionaries() throws IOException { List signatures = new ArrayList(); for (PDSignatureField field : getSignatureFields()) { COSBase value = field.getCOSObject().getDictionaryObject(COSName.V); if (value != null) { signatures.add(new PDSignature((COSDictionary)value)); } } return signatures; } /** * For internal PDFBox use when creating PDF documents: register a TrueTypeFont to make sure it * is closed when the PDDocument is closed to avoid memory leaks. Users don't have to call this * method, it is done by the appropriate PDFont classes. * * @param ttf */ public void registerTrueTypeFontForClosing(TrueTypeFont ttf) { fontsToClose.add(ttf); } /** * Returns the list of fonts which will be subset before the document is saved. */ Set getFontsToSubset() { return fontsToSubset; } /** * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams. * * @param file file to be loaded * * @return loaded document * * @throws InvalidPasswordException If the file required a non-empty password. * @throws IOException in case of a file reading or parsing error */ public static PDDocument load(File file) throws InvalidPasswordException, IOException { return load(file, "", MemoryUsageSetting.setupMainMemoryOnly()); } /** * Parses a PDF. * * @param file file to be loaded * @param memUsageSetting defines how memory is used for buffering PDF streams * * @return loaded document * * @throws InvalidPasswordException If the file required a non-empty password. * @throws IOException in case of a file reading or parsing error */ public static PDDocument load(File file, MemoryUsageSetting memUsageSetting) throws InvalidPasswordException, IOException { return load(file, "", null, null, memUsageSetting); } /** * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams. * * @param file file to be loaded * @param password password to be used for decryption * * @return loaded document * * @throws InvalidPasswordException If the password is incorrect. * @throws IOException in case of a file reading or parsing error */ public static PDDocument load(File file, String password) throws InvalidPasswordException, IOException { return load(file, password, null, null, MemoryUsageSetting.setupMainMemoryOnly()); } /** * Parses a PDF. * * @param file file to be loaded * @param password password to be used for decryption * @param memUsageSetting defines how memory is used for buffering PDF streams * * @return loaded document * * @throws InvalidPasswordException If the password is incorrect. * @throws IOException in case of a file reading or parsing error */ public static PDDocument load(File file, String password, MemoryUsageSetting memUsageSetting) throws InvalidPasswordException, IOException { return load(file, password, null, null, memUsageSetting); } /** * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams. * * @param file file to be loaded * @param password password to be used for decryption * @param keyStore key store to be used for decryption when using public key security * @param alias alias to be used for decryption when using public key security * * @return loaded document * * @throws IOException in case of a file reading or parsing error */ public static PDDocument load(File file, String password, InputStream keyStore, String alias) throws IOException { return load(file, password, keyStore, alias, MemoryUsageSetting.setupMainMemoryOnly()); } /** * Parses a PDF. * * @param file file to be loaded * @param password password to be used for decryption * @param keyStore key store to be used for decryption when using public key security * @param alias alias to be used for decryption when using public key security * @param memUsageSetting defines how memory is used for buffering PDF streams * * @return loaded document * * @throws IOException in case of a file reading or parsing error */ public static PDDocument load(File file, String password, InputStream keyStore, String alias, MemoryUsageSetting memUsageSetting) throws IOException { RandomAccessBufferedFileInputStream raFile = new RandomAccessBufferedFileInputStream(file); try { ScratchFile scratchFile = new ScratchFile(memUsageSetting); try { PDFParser parser = new PDFParser(raFile, password, keyStore, alias, scratchFile); parser.parse(); return parser.getPDDocument(); } catch (IOException ioe) { IOUtils.closeQuietly(scratchFile); throw ioe; } } catch (IOException ioe) { IOUtils.closeQuietly(raFile); throw ioe; } } /** * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf. * Unrestricted main memory will be used for buffering PDF streams. * * @param input stream that contains the document. * * @return loaded document * * @throws InvalidPasswordException If the PDF required a non-empty password. * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(InputStream input) throws InvalidPasswordException, IOException { return load(input, "", null, null, MemoryUsageSetting.setupMainMemoryOnly()); } /** * Parses a PDF. Depending on the memory settings parameter the given input * stream is either copied to main memory or to a temporary file to enable * random access to the pdf. * * @param input stream that contains the document. * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams * * @return loaded document * * @throws InvalidPasswordException If the PDF required a non-empty password. * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(InputStream input, MemoryUsageSetting memUsageSetting) throws InvalidPasswordException, IOException { return load(input, "", null, null, memUsageSetting); } /** * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf. * Unrestricted main memory will be used for buffering PDF streams. * * @param input stream that contains the document. * @param password password to be used for decryption * * @return loaded document * * @throws InvalidPasswordException If the password is incorrect. * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(InputStream input, String password) throws InvalidPasswordException, IOException { return load(input, password, null, null, MemoryUsageSetting.setupMainMemoryOnly()); } /** * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf. * Unrestricted main memory will be used for buffering PDF streams. * * @param input stream that contains the document. * @param password password to be used for decryption * @param keyStore key store to be used for decryption when using public key security * @param alias alias to be used for decryption when using public key security * * @return loaded document * * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(InputStream input, String password, InputStream keyStore, String alias) throws IOException { return load(input, password, keyStore, alias, MemoryUsageSetting.setupMainMemoryOnly()); } /** * Parses a PDF. Depending on the memory settings parameter the given input * stream is either copied to main memory or to a temporary file to enable * random access to the pdf. * * @param input stream that contains the document. * @param password password to be used for decryption * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams * * @return loaded document * * @throws InvalidPasswordException If the password is incorrect. * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(InputStream input, String password, MemoryUsageSetting memUsageSetting) throws InvalidPasswordException, IOException { return load(input, password, null, null, memUsageSetting); } /** * Parses a PDF. Depending on the memory settings parameter the given input * stream is either copied to memory or to a temporary file to enable * random access to the pdf. * * @param input stream that contains the document. * @param password password to be used for decryption * @param keyStore key store to be used for decryption when using public key security * @param alias alias to be used for decryption when using public key security * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams * * @return loaded document * * @throws InvalidPasswordException If the password is incorrect. * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(InputStream input, String password, InputStream keyStore, String alias, MemoryUsageSetting memUsageSetting) throws IOException { ScratchFile scratchFile = new ScratchFile(memUsageSetting); try { RandomAccessRead source = scratchFile.createBuffer(input); PDFParser parser = new PDFParser(source, password, keyStore, alias, scratchFile); parser.parse(); return parser.getPDDocument(); } catch (IOException ioe) { IOUtils.closeQuietly(scratchFile); throw ioe; } } /** * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams. * * @param input byte array that contains the document. * * @return loaded document * * @throws InvalidPasswordException If the PDF required a non-empty password. * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(byte[] input) throws InvalidPasswordException, IOException { return load(input, ""); } /** * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams. * * @param input byte array that contains the document. * @param password password to be used for decryption * * @return loaded document * * @throws InvalidPasswordException If the password is incorrect. * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(byte[] input, String password) throws InvalidPasswordException, IOException { return load(input, password, null, null); } /** * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams. * * @param input byte array that contains the document. * @param password password to be used for decryption * @param keyStore key store to be used for decryption when using public key security * @param alias alias to be used for decryption when using public key security * * @return loaded document * * @throws InvalidPasswordException If the password is incorrect. * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(byte[] input, String password, InputStream keyStore, String alias) throws IOException { return load(input, password, keyStore, alias, MemoryUsageSetting.setupMainMemoryOnly()); } /** * Parses a PDF. * * @param input byte array that contains the document. * @param password password to be used for decryption * @param keyStore key store to be used for decryption when using public key security * @param alias alias to be used for decryption when using public key security * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams * * @return loaded document * * @throws InvalidPasswordException If the password is incorrect. * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(byte[] input, String password, InputStream keyStore, String alias, MemoryUsageSetting memUsageSetting) throws IOException { ScratchFile scratchFile = new ScratchFile(memUsageSetting); RandomAccessRead source = new RandomAccessBuffer(input); PDFParser parser = new PDFParser(source, password, keyStore, alias, scratchFile); parser.parse(); return parser.getPDDocument(); } /** * Save the document to a file. * * @param fileName The file to save as. * * @throws IOException if the output could not be written */ public void save(String fileName) throws IOException { save(new File(fileName)); } /** * Save the document to a file. * * @param file The file to save as. * * @throws IOException if the output could not be written */ public void save(File file) throws IOException { save(new BufferedOutputStream(new FileOutputStream(file))); } /** * This will save the document to an output stream. * * @param output The stream to write to. It will be closed when done. It is recommended to wrap * it in a {@link java.io.BufferedOutputStream}, unless it is already buffered. * * @throws IOException if the output could not be written */ public void save(OutputStream output) throws IOException { if (document.isClosed()) { throw new IOException("Cannot save a document which has been closed"); } // subset designated fonts for (PDFont font : fontsToSubset) { font.subset(); } fontsToSubset.clear(); // save PDF COSWriter writer = new COSWriter(output); try { writer.write(this); } finally { writer.close(); } } /** * Save the PDF as an incremental update. This is only possible if the PDF was loaded from a * file or a stream, not if the document was created in PDFBox itself. * * @param output stream to write to. It will be closed when done. It should not * point to the source file. * @throws IOException if the output could not be written * @throws IllegalStateException if the document was not loaded from a file or a stream. */ public void saveIncremental(OutputStream output) throws IOException { COSWriter writer = null; try { if (pdfSource == null) { throw new IllegalStateException("document was not loaded from a file or a stream"); } writer = new COSWriter(output, pdfSource); writer.write(this, signInterface); } finally { if (writer != null) { writer.close(); } } } /** *

* (This is a new feature for 2.0.3. The API for external signing might change based on feedback after release!) *

* Save PDF incrementally without closing for external signature creation scenario. The general * sequence is: *

     *    PDDocument pdDocument = ...;
     *    OutputStream outputStream = ...;
     *    SignatureOptions signatureOptions = ...; // options to specify fine tuned signature options or null for defaults
     *    PDSignature pdSignature = ...;
     *
     *    // add signature parameters to be used when creating signature dictionary
     *    pdDocument.addSignature(pdSignature, signatureOptions);
     *    // prepare PDF for signing and obtain helper class to be used
     *    ExternalSigningSupport externalSigningSupport = pdDocument.saveIncrementalForExternalSigning(outputStream);
     *    // get data to be signed
     *    InputStream dataToBeSigned = externalSigningSupport.getContent();
     *    // invoke signature service
     *    byte[] signature = sign(dataToBeSigned);
     *    // set resulted CMS signature
     *    externalSigningSupport.setSignature(signature);
     *
     *    // last step is to close the document
     *    pdDocument.close();
     * 
*

* Note that after calling this method, only {@code close()} method may invoked for * {@code PDDocument} instance and only AFTER {@link ExternalSigningSupport} instance is used. *

* * @param output stream to write the final PDF. It should not point to the source * file. It will be closed when the document is closed. * @return instance to be used for external signing and setting CMS signature * @throws IOException if the output could not be written * @throws IllegalStateException if the document was not loaded from a file or a stream or * signature options were not set. */ public ExternalSigningSupport saveIncrementalForExternalSigning(OutputStream output) throws IOException { if (pdfSource == null) { throw new IllegalStateException("document was not loaded from a file or a stream"); } // PDFBOX-3978: getLastSignatureDictionary() not helpful if signing into a template // that is not the last signature. So give higher priority to signature with update flag. PDSignature foundSignature = null; for (PDSignature sig : getSignatureDictionaries()) { foundSignature = sig; if (sig.getCOSObject().isNeedToBeUpdated()) { break; } } int[] byteRange = foundSignature.getByteRange(); if (!Arrays.equals(byteRange, RESERVE_BYTE_RANGE)) { throw new IllegalStateException("signature reserve byte range has been changed " + "after addSignature(), please set the byte range that existed after addSignature()"); } COSWriter writer = new COSWriter(output, pdfSource); writer.write(this); signingSupport = new SigningSupport(writer); return signingSupport; } /** * Returns the page at the given 0-based index. *

* This method is too slow to get all the pages from a large PDF document * (1000 pages or more). For such documents, use the iterator of * {@link PDDocument#getPages()} instead. * * @param pageIndex the 0-based page index * @return the page at the given index. */ public PDPage getPage(int pageIndex) // todo: REPLACE most calls to this method with BELOW method { return getDocumentCatalog().getPages().get(pageIndex); } /** * Returns the page tree. * * @return the page tree */ public PDPageTree getPages() { return getDocumentCatalog().getPages(); } /** * This will return the total page count of the PDF document. * * @return The total number of pages in the PDF document. */ public int getNumberOfPages() { return getDocumentCatalog().getPages().getCount(); } /** * This will close the underlying COSDocument object. * * @throws IOException If there is an error releasing resources. */ @Override public void close() throws IOException { if (!document.isClosed()) { // Make sure that: // - first Exception is kept // - all IO resources are closed // - there's a way to see which errors occured IOException firstException = null; // close resources and COSWriter if (signingSupport != null) { firstException = IOUtils.closeAndLogException(signingSupport, LOG, "SigningSupport", firstException); } // close all intermediate I/O streams firstException = IOUtils.closeAndLogException(document, LOG, "COSDocument", firstException); // close the source PDF stream, if we read from one if (pdfSource != null) { firstException = IOUtils.closeAndLogException(pdfSource, LOG, "RandomAccessRead pdfSource", firstException); } // close fonts for (TrueTypeFont ttf : fontsToClose) { firstException = IOUtils.closeAndLogException(ttf, LOG, "TrueTypeFont", firstException); } // rethrow first exception to keep method contract if (firstException != null) { throw firstException; } } } /** * Protects the document with a protection policy. The document content will be really * encrypted when it will be saved. This method only marks the document for encryption. It also * calls {@link #setAllSecurityToBeRemoved(boolean)} with a false argument if it was set to true * previously and logs a warning. * * @see org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy * @see org.apache.pdfbox.pdmodel.encryption.PublicKeyProtectionPolicy * * @param policy The protection policy. * @throws IOException if there isn't any suitable security handler. */ public void protect(ProtectionPolicy policy) throws IOException { if (isAllSecurityToBeRemoved()) { LOG.warn("do not call setAllSecurityToBeRemoved(true) before calling protect(), " + "as protect() implies setAllSecurityToBeRemoved(false)"); setAllSecurityToBeRemoved(false); } if (!isEncrypted()) { encryption = new PDEncryption(); } SecurityHandler securityHandler = SecurityHandlerFactory.INSTANCE.newSecurityHandlerForPolicy(policy); if (securityHandler == null) { throw new IOException("No security handler for policy " + policy); } getEncryption().setSecurityHandler(securityHandler); } /** * Returns the access permissions granted when the document was decrypted. If the document was not decrypted this * method returns the access permission for a document owner (ie can do everything). The returned object is in read * only mode so that permissions cannot be changed. Methods providing access to content should rely on this object * to verify if the current user is allowed to proceed. * * @return the access permissions for the current user on the document. */ public AccessPermission getCurrentAccessPermission() { if (accessPermission == null) { accessPermission = AccessPermission.getOwnerAccessPermission(); } return accessPermission; } /** * Indicates if all security is removed or not when writing the pdf. * * @return returns true if all security shall be removed otherwise false */ public boolean isAllSecurityToBeRemoved() { return allSecurityToBeRemoved; } /** * Activates/Deactivates the removal of all security when writing the pdf. * * @param removeAllSecurity remove all security if set to true */ public void setAllSecurityToBeRemoved(boolean removeAllSecurity) { allSecurityToBeRemoved = removeAllSecurity; } /** * Provides the document ID. * * @return the dcoument ID */ public Long getDocumentId() { return documentId; } /** * Sets the document ID to the given value. * * @param docId the new document ID */ public void setDocumentId(Long docId) { documentId = docId; } /** * Returns the PDF specification version this document conforms to. * * @return the PDF version (e.g. 1.4f) */ public float getVersion() { float headerVersionFloat = getDocument().getVersion(); // there may be a second version information in the document catalog starting with 1.4 if (headerVersionFloat >= 1.4f) { String catalogVersion = getDocumentCatalog().getVersion(); float catalogVersionFloat = -1; if (catalogVersion != null) { try { catalogVersionFloat = Float.parseFloat(catalogVersion); } catch(NumberFormatException exception) { LOG.error("Can't extract the version number of the document catalog.", exception); } } // the most recent version is the correct one return Math.max(catalogVersionFloat, headerVersionFloat); } else { return headerVersionFloat; } } /** * Sets the PDF specification version for this document. * * @param newVersion the new PDF version (e.g. 1.4f) * */ public void setVersion(float newVersion) { float currentVersion = getVersion(); // nothing to do? if (newVersion == currentVersion) { return; } // the version can't be downgraded if (newVersion < currentVersion) { LOG.error("It's not allowed to downgrade the version of a pdf."); return; } // update the catalog version if the document version is >= 1.4 if (getDocument().getVersion() >= 1.4f) { getDocumentCatalog().setVersion(Float.toString(newVersion)); } else { // versions < 1.4f have a version header only getDocument().setVersion(newVersion); } } /** * Returns the resource cache associated with this document, or null if there is none. * * @return the resource cache or null. */ public ResourceCache getResourceCache() { return resourceCache; } /** * Sets the resource cache associated with this document. * * @param resourceCache A resource cache, or null. */ public void setResourceCache(ResourceCache resourceCache) { this.resourceCache = resourceCache; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy