All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pdfbox.pdmodel.PDDocument Maven / Gradle / Ivy

Go to download

The Apache PDFBox library is an open source Java tool for working with PDF documents.

There is a newer version: 3.0.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.pdmodel;

import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.ScratchFile;
import org.apache.pdfbox.multipdf.PDFCloneUtility;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdfwriter.COSWriter;
import org.apache.pdfbox.pdmodel.common.COSArrayList;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
import org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandlerFactory;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField;

/**
 * This is the in-memory representation of the PDF document.
 * The #close() method must be called once the document is no longer needed.
 * 
 * @author Ben Litchfield
 */
public class PDDocument implements Closeable
{
    private static final Log LOG = LogFactory.getLog(PDDocument.class);

    private final COSDocument document;

    // cached values
    private PDDocumentInformation documentInformation;
    private PDDocumentCatalog documentCatalog;

    // the encryption will be cached here. When the document is decrypted then
    // the COSDocument will not have an "Encrypt" dictionary anymore and this object must be used
    private PDEncryption encryption;

    // holds a flag which tells us if we should remove all security from this documents.
    private boolean allSecurityToBeRemoved;

    // keep tracking customized documentId for the trailer. If null, a new id will be generated
    // this ID doesn't represent the actual documentId from the trailer
    private Long documentId;

    // the pdf to be read
    private final RandomAccessRead pdfSource;

    // the access permissions of the document
    private AccessPermission accessPermission;
    
    // fonts to subset before saving
    private final Set fontsToSubset = new HashSet();
    
    // Signature interface
    private SignatureInterface signInterface;
    
    // document-wide cached resources
    private ResourceCache resourceCache = new DefaultResourceCache();
    
    /**
     * Creates an empty PDF document.
     * You need to add at least one page for the document to be valid.
     */
    public PDDocument()
    {
        this(MemoryUsageSetting.setupMainMemoryOnly());
    }

    /**
     * Creates an empty PDF document.
     * You need to add at least one page for the document to be valid.
     *
     * @param memUsageSetting defines how memory is used for buffering PDF streams 
     */
    public PDDocument(MemoryUsageSetting memUsageSetting)
    {
        ScratchFile scratchFile = null;
        try
        {
            scratchFile = new ScratchFile(memUsageSetting);
        }
        catch (IOException ioe)
        {
            LOG.warn("Error initializing scratch file: " + ioe.getMessage() +
                     ". Fall back to main memory usage only.");
            try
            {
                scratchFile = new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly());
            }
            catch (IOException ioe2) {}
        }
        
        document = new COSDocument(scratchFile);
        pdfSource = null;

        // First we need a trailer
        COSDictionary trailer = new COSDictionary();
        document.setTrailer(trailer);

        // Next we need the root dictionary.
        COSDictionary rootDictionary = new COSDictionary();
        trailer.setItem(COSName.ROOT, rootDictionary);
        rootDictionary.setItem(COSName.TYPE, COSName.CATALOG);
        rootDictionary.setItem(COSName.VERSION, COSName.getPDFName("1.4"));

        // next we need the pages tree structure
        COSDictionary pages = new COSDictionary();
        rootDictionary.setItem(COSName.PAGES, pages);
        pages.setItem(COSName.TYPE, COSName.PAGES);
        COSArray kidsArray = new COSArray();
        pages.setItem(COSName.KIDS, kidsArray);
        pages.setItem(COSName.COUNT, COSInteger.ZERO);
    }

    /**
     * This will add a page to the document. This is a convenience method, that will add the page to the root of the
     * hierarchy and set the parent of the page to the root.
     * 
     * @param page The page to add to the document.
     */
    public void addPage(PDPage page)
    {
        getPages().add(page);
    }

    /**
     * Add a signature.
     * 
     * @param sigObject is the PDSignatureField model
     * @param signatureInterface is an interface which provides signing capabilities
     * @throws IOException if there is an error creating required fields
     */
    public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface) throws IOException
    {
        addSignature(sigObject, signatureInterface, new SignatureOptions());
    }

    /**
     * This will add a signature to the document. If the 0-based page number in the options
     * parameter is smaller than 0 or larger than max, the nearest valid page number will be used
     * (i.e. 0 or max) and no exception will be thrown.
     *
     * @param sigObject is the PDSignatureField model
     * @param signatureInterface is an interface which provides signing capabilities
     * @param options signature options
     * @throws IOException if there is an error creating required fields
     */
    public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface,
                             SignatureOptions options) throws IOException
    {
        // Reserve content
        // We need to reserve some space for the signature. Some signatures including
        // big certificate chain and we need enough space to store it.
        int preferredSignatureSize = options.getPreferredSignatureSize();
        if (preferredSignatureSize > 0)
        {
            sigObject.setContents(new byte[preferredSignatureSize]);
        }
        else
        {
            sigObject.setContents(new byte[SignatureOptions.DEFAULT_SIGNATURE_SIZE]);
        }

        // Reserve ByteRange
        sigObject.setByteRange(new int[] { 0, 1000000000, 1000000000, 1000000000 });

        signInterface = signatureInterface;

        // Create SignatureForm for signature and append it to the document

        // Get the first valid page
        int pageCount = getNumberOfPages();
        if (pageCount == 0)
        {
            throw new IllegalStateException("Cannot sign an empty document");
        }

        int startIndex = Math.min(Math.max(options.getPage(), 0), pageCount - 1);
        PDPage page = getPage(startIndex);

        // Get the AcroForm from the Root-Dictionary and append the annotation
        PDDocumentCatalog catalog = getDocumentCatalog();
        PDAcroForm acroForm = catalog.getAcroForm();
        catalog.getCOSObject().setNeedToBeUpdated(true);

        if (acroForm == null)
        {
            acroForm = new PDAcroForm(this);
            catalog.setAcroForm(acroForm);
        }
        else
        {
            acroForm.getCOSObject().setNeedToBeUpdated(true);
        }

        List fields = acroForm.getFields();
        if (fields == null)
        {
            fields = new ArrayList();
            acroForm.setFields(fields);
        }
        else
        {
            COSArray fieldArray = (COSArray) acroForm.getCOSObject().getDictionaryObject(COSName.FIELDS);
            fieldArray.setNeedToBeUpdated(true);
        }
        PDSignatureField signatureField = findSignatureField(fields, sigObject);
        if (signatureField == null)
        {
            signatureField = new PDSignatureField(acroForm);
            // append the signature object
            signatureField.setValue(sigObject);
            // backward linking
            signatureField.getWidgets().get(0).setPage(page);
        }
        // to conform PDF/A-1 requirement:
        // The /F key's Print flag bit shall be set to 1 and 
        // its Hidden, Invisible and NoView flag bits shall be set to 0
        signatureField.getWidgets().get(0).setPrinted(true);

        // Set the AcroForm Fields
        List acroFormFields = acroForm.getFields();
        acroForm.getCOSObject().setDirect(true);
        acroForm.setSignaturesExist(true);
        acroForm.setAppendOnly(true);

        boolean checkFields = checkSignatureField(acroFormFields, signatureField);

        // Get the object from the visual signature
        COSDocument visualSignature = options.getVisualSignature();

        // Distinction of case for visual and non-visual signature
        if (visualSignature == null)
        {
            prepareNonVisibleSignature(signatureField);
            return;
        }
        
        prepareVisibleSignature(signatureField, acroForm, visualSignature);

        // Create Annotation / Field for signature
        List annotations = page.getAnnotations();

        // Make /Annots a direct object to avoid problem if it is an existing indirect object: 
        // it would not be updated in incremental save, and if we'd set the /Annots array "to be updated" 
        // while keeping it indirect, Adobe Reader would claim that the document had been modified.
        page.setAnnotations(annotations);

        // Get the annotations of the page and append the signature-annotation to it
        // take care that page and acroforms do not share the same array (if so, we don't need to add it twice)
        if (!(annotations instanceof COSArrayList &&
              acroFormFields instanceof COSArrayList &&
              ((COSArrayList) annotations).toList().equals(((COSArrayList) acroFormFields).toList()) &&
              checkFields))
        {
            annotations.add(signatureField.getWidgets().get(0));
        }
        page.getCOSObject().setNeedToBeUpdated(true);
    }

    // search acroform field list for signature field with specific signature dictionary
    private PDSignatureField findSignatureField(List fields, PDSignature sigObject)
    {
        PDSignatureField signatureField = null;
        for (PDField pdField : fields)
        {
            if (pdField instanceof PDSignatureField)
            {
                PDSignature signature = ((PDSignatureField) pdField).getSignature();
                if (signature != null && signature.getCOSObject().equals(sigObject.getCOSObject()))
                {
                    signatureField = (PDSignatureField) pdField;
                }
            }
        }
        return signatureField;
    }

    // return true if the field already existed in the field list, in that case, it is marked for update
    private boolean checkSignatureField(List acroFormFields, PDSignatureField signatureField)
    {
        boolean checkFields = false;
        for (PDField field : acroFormFields)
        {
            if (field instanceof PDSignatureField
                    && field.getCOSObject().equals(signatureField.getCOSObject()))
            {
                checkFields = true;
                signatureField.getCOSObject().setNeedToBeUpdated(true);
                break;
            }
            // fixme: this code does not check non-terminal fields, there could be a descendant signature
        }
        if (!checkFields)
        {
            acroFormFields.add(signatureField);
        }
        return checkFields;
    }

    private void prepareVisibleSignature(PDSignatureField signatureField, PDAcroForm acroForm, 
            COSDocument visualSignature)
    {
        // Obtain visual signature object
        boolean annotNotFound = true;
        boolean sigFieldNotFound = true;
        for (COSObject cosObject : visualSignature.getObjects())
        {
            if (!annotNotFound && !sigFieldNotFound)
            {
                break;
            }
            
            COSBase base = cosObject.getObject();
            if (base instanceof COSDictionary)
            {
                COSDictionary cosBaseDict = (COSDictionary) base;

                // Search for signature annotation
                COSBase type = cosBaseDict.getDictionaryObject(COSName.TYPE);
                if (annotNotFound && COSName.ANNOT.equals(type))
                {
                    assignSignatureRectangle(signatureField, cosBaseDict);
                    annotNotFound = false;
                }

                // Search for signature field
                COSBase fieldType = cosBaseDict.getDictionaryObject(COSName.FT);
                COSBase apDict = cosBaseDict.getDictionaryObject(COSName.AP);
                if (sigFieldNotFound && COSName.SIG.equals(fieldType) && apDict instanceof COSDictionary)
                {
                    assignAppearanceDictionary(signatureField, (COSDictionary) apDict);
                    assignAcroFormDefaultResource(acroForm, cosBaseDict);
                    sigFieldNotFound = false;
                }
            }
        }
        
        if (annotNotFound || sigFieldNotFound)
        {
            throw new IllegalArgumentException("Template is missing required objects");
        }
    }

    private void assignSignatureRectangle(PDSignatureField signatureField, COSDictionary annotDict)
    {
        // Read and set the rectangle for visual signature
        COSArray rectArray = (COSArray) annotDict.getDictionaryObject(COSName.RECT);
        PDRectangle rect = new PDRectangle(rectArray);
        signatureField.getWidgets().get(0).setRectangle(rect);
    }

    private void assignAppearanceDictionary(PDSignatureField signatureField, COSDictionary apDict)
    {
        // read and set Appearance Dictionary
        PDAppearanceDictionary ap = new PDAppearanceDictionary(apDict);
        apDict.setDirect(true);
        signatureField.getWidgets().get(0).setAppearance(ap);
    }

    private void assignAcroFormDefaultResource(PDAcroForm acroForm, COSDictionary dict)
    {
        // read and set AcroForm default resource dictionary /DR if available
        COSBase base = dict.getDictionaryObject(COSName.DR);
        if (base instanceof COSDictionary)
        {
            COSDictionary dr = (COSDictionary) base;
            dr.setDirect(true);
            dr.setNeedToBeUpdated(true);
            acroForm.getCOSObject().setItem(COSName.DR, dr);
        }
    }

    private void prepareNonVisibleSignature(PDSignatureField signatureField)
            throws IOException
    {
        // "Signature fields that are not intended to be visible shall
        // have an annotation rectangle that has zero height and width."
        // Set rectangle for non-visual signature to rectangle array [ 0 0 0 0 ]
        signatureField.getWidgets().get(0).setRectangle(new PDRectangle());
    }

    /**
     * This will add a signature field to the document.
     * 
     * @param sigFields are the PDSignatureFields that should be added to the document
     * @param signatureInterface is a interface which provides signing capabilities
     * @param options signature options
     * @throws IOException if there is an error creating required fields
     */
    public void addSignatureField(List sigFields, SignatureInterface signatureInterface,
            SignatureOptions options) throws IOException
    {
        PDDocumentCatalog catalog = getDocumentCatalog();
        catalog.getCOSObject().setNeedToBeUpdated(true);

        PDAcroForm acroForm = catalog.getAcroForm();
        if (acroForm == null)
        {
            acroForm = new PDAcroForm(this);
            catalog.setAcroForm(acroForm);
        }
        COSDictionary acroFormDict = acroForm.getCOSObject();
        acroFormDict.setDirect(true);
        acroFormDict.setNeedToBeUpdated(true);
        if (!acroForm.isSignaturesExist())
        {
            // 1 if at least one signature field is available
            acroForm.setSignaturesExist(true); 
        }

        List acroformFields = acroForm.getFields();

        for (PDSignatureField sigField : sigFields)
        {
            sigField.getCOSObject().setNeedToBeUpdated(true);
            
            // Check if the field already exists
            checkSignatureField(acroformFields, sigField);

            // Check if we need to add a signature
            if (sigField.getSignature() != null)
            {
                sigField.getCOSObject().setNeedToBeUpdated(true);
                if (options == null)
                {
                    // TODO ??
                }
                addSignature(sigField.getSignature(), signatureInterface, options);
            }
        }
    }

    /**
     * Remove the page from the document.
     * 
     * @param page The page to remove from the document.
     */
    public void removePage(PDPage page)
    {
        getPages().remove(page);
    }

    /**
     * Remove the page from the document.
     * 
     * @param pageNumber 0 based index to page number.
     */
    public void removePage(int pageNumber)
    {
        getPages().remove(pageNumber);
    }

    /**
     * This will import and copy the contents from another location. Currently the content stream is stored in a scratch
     * file. The scratch file is associated with the document. If you are adding a page to this document from another
     * document and want to copy the contents to this document's scratch file then use this method otherwise just use
     * the {@link #addPage} method.
     * 
     * Unlike {@link #addPage}, this method does a deep clone. This will be slower and have a larger
     * memory footprint. However the deep clone is important to avoid resources getting lost if the
     * source document is closed when the destination document is saved.
     *
     * If your page has annotations, and if these link to pages not in the target document, then the
     * target document might become huge. What you need to do is to delete page references of such
     * annotations. See
     * here for how to do this.
     *
     * @param page The page to import.
     * @return The page that was imported.
     *
     * @throws IOException If there is an error copying the page.
     */
    public PDPage importPage(PDPage page) throws IOException
    {
        PDFCloneUtility cloner = new PDFCloneUtility(this);
        COSBase pageBase = cloner.cloneForNewDocument(page.getCOSObject());
        PDPage importedPage = new PDPage((COSDictionary) pageBase, resourceCache);
        addPage(importedPage);
        return importedPage;
    }

    /**
     * Constructor that uses an existing document. The COSDocument that is passed in must be valid.
     * 
     * @param doc The COSDocument that this document wraps.
     */
    public PDDocument(COSDocument doc)
    {
        this(doc, null);
    }

    /**
     * Constructor that uses an existing document. The COSDocument that is passed in must be valid.
     * 
     * @param doc The COSDocument that this document wraps.
     * @param source the parser which is used to read the pdf
     */
    public PDDocument(COSDocument doc, RandomAccessRead source)
    {
        this(doc, source, null);
    }

    /**
     * Constructor that uses an existing document. The COSDocument that is passed in must be valid.
     * 
     * @param doc The COSDocument that this document wraps.
     * @param source the parser which is used to read the pdf
     * @param permission he access permissions of the pdf
     * 
     */
    public PDDocument(COSDocument doc, RandomAccessRead source, AccessPermission permission)
    {
        document = doc;
        pdfSource = source;
        accessPermission = permission;
    }

    /**
     * This will get the low level document.
     * 
     * @return The document that this layer sits on top of.
     */
    public COSDocument getDocument()
    {
        return document;
    }

    /**
     * This will get the document info dictionary. This is guaranteed to not return null.
     * 
     * @return The documents /Info dictionary
     */
    public PDDocumentInformation getDocumentInformation()
    {
        if (documentInformation == null)
        {
            COSDictionary trailer = document.getTrailer();
            COSDictionary infoDic = (COSDictionary) trailer.getDictionaryObject(COSName.INFO);
            if (infoDic == null)
            {
                infoDic = new COSDictionary();
                trailer.setItem(COSName.INFO, infoDic);
            }
            documentInformation = new PDDocumentInformation(infoDic);
        }
        return documentInformation;
    }

    /**
     * This will set the document information for this document.
     * 
     * @param info The updated document information.
     */
    public void setDocumentInformation(PDDocumentInformation info)
    {
        documentInformation = info;
        document.getTrailer().setItem(COSName.INFO, info.getCOSObject());
    }

    /**
     * This will get the document CATALOG. This is guaranteed to not return null.
     * 
     * @return The documents /Root dictionary
     */
    public PDDocumentCatalog getDocumentCatalog()
    {
        if (documentCatalog == null)
        {
            COSDictionary trailer = document.getTrailer();
            COSBase dictionary = trailer.getDictionaryObject(COSName.ROOT);
            if (dictionary instanceof COSDictionary)
            {
                documentCatalog = new PDDocumentCatalog(this, (COSDictionary) dictionary);
            }
            else
            {
                documentCatalog = new PDDocumentCatalog(this);
            }
        }
        return documentCatalog;
    }

    /**
     * This will tell if this document is encrypted or not.
     * 
     * @return true If this document is encrypted.
     */
    public boolean isEncrypted()
    {
        return document.isEncrypted();
    }

    /**
     * This will get the encryption dictionary for this document. This will still return the parameters if the document
     * was decrypted. As the encryption architecture in PDF documents is plugable this returns an abstract class,
     * but the only supported subclass at this time is a
     * PDStandardEncryption object.
     *
     * @return The encryption dictionary(most likely a PDStandardEncryption object)
     */
    public PDEncryption getEncryption()
    {
        if (encryption == null && isEncrypted())
        {
            encryption = new PDEncryption(document.getEncryptionDictionary());
        }
        return encryption;
    }

    /**
     * This will set the encryption dictionary for this document.
     * 
     * @param encryption The encryption dictionary(most likely a PDStandardEncryption object)
     * 
     * @throws IOException If there is an error determining which security handler to use.
     */
    public void setEncryptionDictionary(PDEncryption encryption) throws IOException
    {
        this.encryption = encryption;
    }

    /**
     * This will return the last signature.
     * 
     * @return the last signature as PDSignatureField.
     * @throws IOException if no document catalog can be found.
     */
    public PDSignature getLastSignatureDictionary() throws IOException
    {
        List signatureDictionaries = getSignatureDictionaries();
        int size = signatureDictionaries.size();
        if (size > 0)
        {
            return signatureDictionaries.get(size - 1);
        }
        return null;
    }

    /**
     * Retrieve all signature fields from the document.
     * 
     * @return a List of PDSignatureFields
     * @throws IOException if no document catalog can be found.
     */
    public List getSignatureFields() throws IOException
    {
        List fields = new ArrayList();
        PDAcroForm acroForm = getDocumentCatalog().getAcroForm();
        if (acroForm != null)
        {
            for (PDField field : acroForm.getFieldTree())
            {
                if (field instanceof PDSignatureField)
                {
                    fields.add((PDSignatureField)field);
                }
            }
        }
        return fields;
    }

    /**
     * Retrieve all signature dictionaries from the document.
     * 
     * @return a List of PDSignatureFields
     * @throws IOException if no document catalog can be found.
     */
    public List getSignatureDictionaries() throws IOException
    {
        List signatures = new ArrayList();
        for (PDSignatureField field : getSignatureFields())
        {
            COSBase value = field.getCOSObject().getDictionaryObject(COSName.V);
            if (value != null)
            {
                signatures.add(new PDSignature((COSDictionary)value));
            }
        }
        return signatures;
    }

    /**
     * Returns the list of fonts which will be subset before the document is saved.
     */
    Set getFontsToSubset()
    {
        return fontsToSubset;
    }

    /**
     * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams.
     * 
     * @param file file to be loaded
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file) throws IOException
    {
        return load(file, "", MemoryUsageSetting.setupMainMemoryOnly());
    }

    /**
     * Parses a PDF.
     * 
     * @param file file to be loaded
     * @param memUsageSetting defines how memory is used for buffering PDF streams 
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file, MemoryUsageSetting memUsageSetting) throws IOException
    {
        return load(file, "", null, null, memUsageSetting);
    }

    /**
     * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams.
     * 
     * @param file file to be loaded
     * @param password password to be used for decryption
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file, String password) throws IOException
    {
        return load(file, password, null, null, MemoryUsageSetting.setupMainMemoryOnly());
    }

    /**
     * Parses a PDF.
     * 
     * @param file file to be loaded
     * @param password password to be used for decryption
     * @param memUsageSetting defines how memory is used for buffering PDF streams 
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file, String password, MemoryUsageSetting memUsageSetting) throws IOException
    {
        return load(file, password, null, null, memUsageSetting);
    }

    /**
     * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams.
     * 
     * @param file file to be loaded
     * @param password password to be used for decryption
     * @param keyStore key store to be used for decryption when using public key security 
     * @param alias alias to be used for decryption when using public key security
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file, String password, InputStream keyStore, String alias)
    throws IOException
    {
        return load(file, password, keyStore, alias, MemoryUsageSetting.setupMainMemoryOnly());
    }

    /**
     * Parses a PDF.
     * 
     * @param file file to be loaded
     * @param password password to be used for decryption
     * @param keyStore key store to be used for decryption when using public key security 
     * @param alias alias to be used for decryption when using public key security
     * @param memUsageSetting defines how memory is used for buffering PDF streams 
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(File file, String password, InputStream keyStore, String alias,
                                  MemoryUsageSetting memUsageSetting) throws IOException
    {
        RandomAccessBufferedFileInputStream raFile = new RandomAccessBufferedFileInputStream(file);
        try
        {
            ScratchFile scratchFile = new ScratchFile(memUsageSetting);
            try
            {
                PDFParser parser = new PDFParser(raFile, password, keyStore, alias, scratchFile);
                parser.parse();
                return parser.getPDDocument();
            }
            catch (IOException ioe)
            {
                IOUtils.closeQuietly(scratchFile);
                throw ioe;
            }
        }
        catch (IOException ioe)
        {
            IOUtils.closeQuietly(raFile);
            throw ioe;
        }
    }

    /**
     * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf.
     * Unrestricted main memory will be used for buffering PDF streams.
     * 
     * @param input stream that contains the document.
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input) throws IOException
    {
        return load(input, "", null, null, MemoryUsageSetting.setupMainMemoryOnly());
    }

    /**
     * Parses a PDF. Depending on the memory settings parameter the given input
     * stream is either copied to main memory or to a temporary file to enable
     * random access to the pdf.
     * 
     * @param input stream that contains the document.
     * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams 
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input, MemoryUsageSetting memUsageSetting) throws IOException
    {
        return load(input, "", null, null, memUsageSetting);
    }

    /**
     * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf.
     * Unrestricted main memory will be used for buffering PDF streams.
     * 
     * @param input stream that contains the document.
     * @param password password to be used for decryption
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input, String password)
            throws IOException
    {
        return load(input, password, null, null, MemoryUsageSetting.setupMainMemoryOnly());
    }

    /**
     * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf.
     * Unrestricted main memory will be used for buffering PDF streams.
     * 
     * @param input stream that contains the document.
     * @param password password to be used for decryption
     * @param keyStore key store to be used for decryption when using public key security 
     * @param alias alias to be used for decryption when using public key security
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input, String password, InputStream keyStore, String alias)
            throws IOException
    {
        return load(input, password, keyStore, alias, MemoryUsageSetting.setupMainMemoryOnly());
    }

    /**
     * Parses a PDF. Depending on the memory settings parameter the given input
     * stream is either copied to main memory or to a temporary file to enable
     * random access to the pdf.
     * 
     * @param input stream that contains the document.
     * @param password password to be used for decryption
     * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams 
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input, String password, MemoryUsageSetting memUsageSetting)
            throws IOException
    {
        return load(input, password, null, null, memUsageSetting);
    }
    
    /**
     * Parses a PDF. Depending on the memory settings parameter the given input
     * stream is either copied to memory or to a temporary file to enable
     * random access to the pdf.
     * 
     * @param input stream that contains the document.
     * @param password password to be used for decryption
     * @param keyStore key store to be used for decryption when using public key security 
     * @param alias alias to be used for decryption when using public key security
     * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams 
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(InputStream input, String password, InputStream keyStore, 
                                  String alias, MemoryUsageSetting memUsageSetting) throws IOException
    {
        ScratchFile scratchFile = new ScratchFile(memUsageSetting);
        try
        {
            RandomAccessRead source = scratchFile.createBuffer(input);
            PDFParser parser = new PDFParser(source, password, keyStore, alias, scratchFile);
            parser.parse();
            return parser.getPDDocument();
        }
        catch (IOException ioe)
        {
            IOUtils.closeQuietly(scratchFile);
            throw ioe;
        }
    }

    /**
     * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams.
     * 
     * @param input byte array that contains the document.
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(byte[] input) throws IOException
    {
        return load(input, "");
    }

    /**
     * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams.
     * 
     * @param input byte array that contains the document.
     * @param password password to be used for decryption
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(byte[] input, String password) throws IOException
    {
        return load(input, password, null, null);
    }

    /**
     * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams.
     * 
     * @param input byte array that contains the document.
     * @param password password to be used for decryption
     * @param keyStore key store to be used for decryption when using public key security 
     * @param alias alias to be used for decryption when using public key security
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(byte[] input, String password, InputStream keyStore, 
            String alias) throws IOException
    {
        return load(input, password, keyStore, alias, MemoryUsageSetting.setupMainMemoryOnly());
    }

    /**
     * Parses a PDF.
     * 
     * @param input byte array that contains the document.
     * @param password password to be used for decryption
     * @param keyStore key store to be used for decryption when using public key security 
     * @param alias alias to be used for decryption when using public key security
     * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams 
     * 
     * @return loaded document
     * 
     * @throws IOException in case of a file reading or parsing error
     */
    public static PDDocument load(byte[] input, String password, InputStream keyStore, 
            String alias, MemoryUsageSetting memUsageSetting) throws IOException
    {
        ScratchFile scratchFile = new ScratchFile(memUsageSetting);
        RandomAccessRead source = new RandomAccessBuffer(input);
        PDFParser parser = new PDFParser(source, password, keyStore, alias, scratchFile);
        parser.parse();
        return parser.getPDDocument();
    }

    /**
     * Save the document to a file.
     * 
     * @param fileName The file to save as.
     *
     * @throws IOException if the output could not be written
     */
    public void save(String fileName) throws IOException
    {
        save(new File(fileName));
    }

    /**
     * Save the document to a file.
     * 
     * @param file The file to save as.
     *
     * @throws IOException if the output could not be written
     */
    public void save(File file) throws IOException
    {
        save(new BufferedOutputStream(new FileOutputStream(file)));
    }

    /**
     * This will save the document to an output stream.
     * 
     * @param output The stream to write to.
     *
     * @throws IOException if the output could not be written
     */
    public void save(OutputStream output) throws IOException
    {
        if (document.isClosed())
        {
            throw new IOException("Cannot save a document which has been closed");
        }

        // subset designated fonts
        for (PDFont font : fontsToSubset)
        {
            font.subset();
        }
        fontsToSubset.clear();
        
        // save PDF
        COSWriter writer = new COSWriter(output);
        try
        {
            writer.write(this);
            writer.close();
        }
        finally
        {
            writer.close();
        }
    }

   /**
     * Save the PDF as an incremental update. This is only possible if the PDF was loaded from a
     * file or a stream, not if the document was created in PDFBox itself.
     *
     * @param output stream to write
     * @throws IOException if the output could not be written
     * @throws IllegalStateException if the document was not loaded from a file or a stream.
     */
    public void saveIncremental(OutputStream output) throws IOException
    {
        COSWriter writer = null;
        try
        {
            if (pdfSource == null)
            {
                throw new IllegalStateException("document was not loaded from a file or a stream");
            }
            writer = new COSWriter(output, pdfSource);
            writer.write(this, signInterface);
            writer.close();
        }
        finally
        {
            if (writer != null)
            {
                writer.close();
            }
        }
    }

    /**
     * Returns the page at the given index.
     *
     * @param pageIndex the page index
     * @return the page at the given index.
     */
    public PDPage getPage(int pageIndex) // todo: REPLACE most calls to this method with BELOW method
    {
        return getDocumentCatalog().getPages().get(pageIndex);
    }

    /**
     * Returns the page tree.
     * 
     * @return the page tree
     */
    public PDPageTree getPages()
    {
        return getDocumentCatalog().getPages();
    }

    /**
     * This will return the total page count of the PDF document.
     * 
     * @return The total number of pages in the PDF document.
     */
    public int getNumberOfPages()
    {
        return getDocumentCatalog().getPages().getCount();
    }

    /**
     * This will close the underlying COSDocument object.
     * 
     * @throws IOException If there is an error releasing resources.
     */
    @Override
    public void close() throws IOException
    {
        if (!document.isClosed())
        {
            // close all intermediate I/O streams
            document.close();
            
            // close the source PDF stream, if we read from one
            if (pdfSource != null)
            {
                pdfSource.close();
            }
        }
    }

    /**
     * Protects the document with a protection policy. The document content will be really
     * encrypted when it will be saved. This method only marks the document for encryption. It also
     * calls {@link #setAllSecurityToBeRemoved(boolean)} with a false argument if it was set to true
     * previously and logs a warning.
     *
     * @see org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy
     * @see org.apache.pdfbox.pdmodel.encryption.PublicKeyProtectionPolicy
     *
     * @param policy The protection policy.
     * @throws IOException if there isn't any suitable security handler.
     */
    public void protect(ProtectionPolicy policy) throws IOException
    {
        if (isAllSecurityToBeRemoved())
        {
            LOG.warn("do not call setAllSecurityToBeRemoved(true) before calling protect(), "
                    + "as protect() implies setAllSecurityToBeRemoved(false)");
            setAllSecurityToBeRemoved(false);
        }
        
        if (!isEncrypted())
        {
            encryption = new PDEncryption();
        }

        SecurityHandler securityHandler = SecurityHandlerFactory.INSTANCE.newSecurityHandlerForPolicy(policy);
        if (securityHandler == null)
        {
            throw new IOException("No security handler for policy " + policy);
        }

        getEncryption().setSecurityHandler(securityHandler);
    }

    /**
     * Returns the access permissions granted when the document was decrypted. If the document was not decrypted this
     * method returns the access permission for a document owner (ie can do everything). The returned object is in read
     * only mode so that permissions cannot be changed. Methods providing access to content should rely on this object
     * to verify if the current user is allowed to proceed.
     * 
     * @return the access permissions for the current user on the document.
     */
    public AccessPermission getCurrentAccessPermission()
    {
        if (accessPermission == null)
        {
            accessPermission = AccessPermission.getOwnerAccessPermission();
        }
        return accessPermission;
    }

    /**
     * Indicates if all security is removed or not when writing the pdf.
     * 
     * @return returns true if all security shall be removed otherwise false
     */
    public boolean isAllSecurityToBeRemoved()
    {
        return allSecurityToBeRemoved;
    }

    /**
     * Activates/Deactivates the removal of all security when writing the pdf.
     * 
     * @param removeAllSecurity remove all security if set to true
     */
    public void setAllSecurityToBeRemoved(boolean removeAllSecurity)
    {
        allSecurityToBeRemoved = removeAllSecurity;
    }

    /**
     * Provides the document ID.
     *
     * @return the dcoument ID
     */
    public Long getDocumentId()
    {
        return documentId;
    }

    /**
     * Sets the document ID to the given value.
     * 
     * @param docId the new document ID
     */
    public void setDocumentId(Long docId)
    {
        documentId = docId;
    }
    
    /**
     * Returns the PDF specification version this document conforms to.
     *
     * @return the PDF version (e.g. 1.4f)
     */
    public float getVersion()
    {
        float headerVersionFloat = getDocument().getVersion();
        // there may be a second version information in the document catalog starting with 1.4
        if (headerVersionFloat >= 1.4f)
        {
            String catalogVersion = getDocumentCatalog().getVersion();
            float catalogVersionFloat = -1;
            if (catalogVersion != null)
            {
                try
                {
                    catalogVersionFloat = Float.parseFloat(catalogVersion);
                }
                catch(NumberFormatException exception)
                {
                    LOG.error("Can't extract the version number of the document catalog.", exception);
                }
            }
            // the most recent version is the correct one
            return Math.max(catalogVersionFloat, headerVersionFloat);
        }
        else
        {
            return headerVersionFloat;
        }
    }

    /**
     * Sets the PDF specification version for this document.
     *
     * @param newVersion the new PDF version (e.g. 1.4f)
     * 
     */
    public void setVersion(float newVersion)
    {
        float currentVersion = getVersion();
        // nothing to do?
        if (newVersion == currentVersion)
        {
            return;
        }
        // the version can't be downgraded
        if (newVersion < currentVersion)
        {
            LOG.error("It's not allowed to downgrade the version of a pdf.");
            return;
        }
        // update the catalog version if the document version is >= 1.4
        if (getDocument().getVersion() >= 1.4f)
        {
            getDocumentCatalog().setVersion(Float.toString(newVersion));
        }
        else
        {
            // versions < 1.4f have a version header only
            getDocument().setVersion(newVersion);
        }
    }

    /**
     * Returns the resource cache associated with this document, or null if there is none.
     */
    public ResourceCache getResourceCache()
    {
        return resourceCache;
    }

    /**
     * Sets the resource cache associated with this document.
     * 
     * @param resourceCache A resource cache, or null.
     */
    public void setResourceCache(ResourceCache resourceCache)
    {
        this.resourceCache = resourceCache;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy