All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pdfbox.pdfparser.COSParser Maven / Gradle / Ivy

Go to download

The Apache PDFBox library is an open source Java tool for working with PDF documents.

There is a newer version: 3.0.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.pdfparser;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.security.KeyStore;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;
import java.util.TreeMap;
import java.util.Vector;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSInputStream;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSNull;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSObjectKey;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
import org.apache.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;

import static org.apache.pdfbox.util.Charsets.ISO_8859_1;

/**
 * PDF-Parser which first reads startxref and xref tables in order to know valid objects and parse only these objects.
 * 
 * First {@link PDFParser#parse()} or  {@link FDFParser#parse()} must be called before page objects
 * can be retrieved, e.g. {@link PDFParser#getPDDocument()}.
 * 
 * This class is a much enhanced version of QuickParser presented in PDFBOX-1104 by Jeremy Villalobos.
 */
public class COSParser extends BaseParser
{
    private static final String PDF_HEADER = "%PDF-";
    private static final String FDF_HEADER = "%FDF-";
    
    private static final String PDF_DEFAULT_VERSION = "1.4";
    private static final String FDF_DEFAULT_VERSION = "1.0";

    private static final char[] XREF_TABLE = new char[] { 'x', 'r', 'e', 'f' };
    private static final char[] XREF_STREAM = new char[] { '/', 'X', 'R', 'e', 'f' };
    private static final char[] STARTXREF = new char[] { 's','t','a','r','t','x','r','e','f' };

    private static final byte[] ENDSTREAM = new byte[] { E, N, D, S, T, R, E, A, M };

    private static final byte[] ENDOBJ = new byte[] { E, N, D, O, B, J };

    private static final long MINIMUM_SEARCH_OFFSET = 6;
    
    private static final int X = 'x';

    private static final int STRMBUFLEN = 2048;
    private final byte[] strmBuf    = new byte[ STRMBUFLEN ];

    protected final RandomAccessRead source;

    private AccessPermission accessPermission;
    private InputStream keyStoreInputStream = null;
    private String password = "";
    private String keyAlias = null;

    /**
     * Only parse the PDF file minimally allowing access to basic information.
     */
    public static final String SYSPROP_PARSEMINIMAL = 
            "org.apache.pdfbox.pdfparser.nonSequentialPDFParser.parseMinimal";
    
    /**
     * The range within the %%EOF marker will be searched.
     * Useful if there are additional characters after %%EOF within the PDF. 
     */
    public static final String SYSPROP_EOFLOOKUPRANGE =
            "org.apache.pdfbox.pdfparser.nonSequentialPDFParser.eofLookupRange";

    /**
     * How many trailing bytes to read for EOF marker.
     */
    private static final int DEFAULT_TRAIL_BYTECOUNT = 2048;
    /**
     * EOF-marker.
     */
    protected static final char[] EOF_MARKER = new char[] { '%', '%', 'E', 'O', 'F' };
    /**
     * obj-marker.
     */
    protected static final char[] OBJ_MARKER = new char[] { 'o', 'b', 'j' };

    /**
     * trailer-marker.
     */
    private static final char[] TRAILER_MARKER = new char[] { 't', 'r', 'a', 'i', 'l', 'e', 'r' };

    /**
     * ObjStream-marker.
     */
    private static final char[] OBJ_STREAM = new char[] { '/', 'O', 'b', 'j', 'S', 't', 'm' };

    private long trailerOffset;
    
    /**
     * file length.
     */
    protected long fileLen;

    /**
     * is parser using auto healing capacity ?
     */
    private boolean isLenient = true;

    protected boolean initialParseDone = false;

    private boolean trailerWasRebuild = false;
    /**
     * Contains all found objects of a brute force search.
     */
    private Map bfSearchCOSObjectKeyOffsets = null;
    private Long lastEOFMarker = null;
    private List bfSearchXRefTablesOffsets = null;
    private List bfSearchXRefStreamsOffsets = null;
    private PDEncryption encryption = null;

    /**
     * The security handler.
     */
    protected SecurityHandler securityHandler = null;

    /**
     *  how many trailing bytes to read for EOF marker.
     */
    private int readTrailBytes = DEFAULT_TRAIL_BYTECOUNT; 

    private static final Log LOG = LogFactory.getLog(COSParser.class);

    /** 
     * Collects all Xref/trailer objects and resolves them into single
     * object using startxref reference. 
     */
    protected XrefTrailerResolver xrefTrailerResolver = new XrefTrailerResolver();


    /**
     * The prefix for the temp file being used. 
     */
    public static final String TMP_FILE_PREFIX = "tmpPDF";
    
    /**
     * Default constructor.
     */
    public COSParser(RandomAccessRead source)
    {
        super(new RandomAccessSource(source));
        this.source = source;
    }

    /**
     * Constructor for encrypted pdfs.
     * 
     * @param source input representing the pdf.
     * @param password password to be used for decryption.
     * @param keyStore key store to be used for decryption when using public key security
     * @param keyAlias alias to be used for decryption when using public key security
     * 
     */
    public COSParser(RandomAccessRead source, String password, InputStream keyStore,
            String keyAlias)
    {
        super(new RandomAccessSource(source));
        this.source = source;
        this.password = password;
        this.keyAlias = keyAlias;
        keyStoreInputStream = keyStore;
    }

    /**
     * Sets how many trailing bytes of PDF file are searched for EOF marker and 'startxref' marker. If not set we use
     * default value {@link #DEFAULT_TRAIL_BYTECOUNT}.
     * 
     * 

We check that new value is at least 16. However for practical use cases this value should not be lower than * 1000; even 2000 was found to not be enough in some cases where some trailing garbage like HTML snippets followed * the EOF marker.

* *

* In case system property {@link #SYSPROP_EOFLOOKUPRANGE} is defined this value will be set on initialization but * can be overwritten later. *

* * @param byteCount number of trailing bytes */ public void setEOFLookupRange(int byteCount) { if (byteCount > 15) { readTrailBytes = byteCount; } } /** * Read the trailer information and provide a COSDictionary containing the trailer information. * * @return a COSDictionary containing the trailer information * @throws IOException if something went wrong */ protected COSDictionary retrieveTrailer() throws IOException { COSDictionary trailer = null; boolean rebuildTrailer = false; try { // parse startxref // TODO FDF files don't have a startxref value, so that rebuildTrailer is triggered long startXRefOffset = getStartxrefOffset(); if (startXRefOffset > -1) { trailer = parseXref(startXRefOffset); } else { rebuildTrailer = isLenient(); } } catch (IOException exception) { if (isLenient()) { rebuildTrailer = true; } else { throw exception; } } // check if the trailer contains a Root object if (trailer != null && trailer.getItem(COSName.ROOT) == null) { rebuildTrailer = isLenient(); } if (rebuildTrailer) { trailer = rebuildTrailer(); } else { // prepare decryption if necessary prepareDecryption(); if (bfSearchCOSObjectKeyOffsets != null && !bfSearchCOSObjectKeyOffsets.isEmpty()) { bfSearchForObjStreams(); } } return trailer; } /** * Parses cross reference tables. * * @param startXRefOffset start offset of the first table * @return the trailer dictionary * @throws IOException if something went wrong */ protected COSDictionary parseXref(long startXRefOffset) throws IOException { source.seek(startXRefOffset); long startXrefOffset = Math.max(0, parseStartXref()); // check the startxref offset long fixedOffset = checkXRefOffset(startXrefOffset); if (fixedOffset > -1) { startXrefOffset = fixedOffset; } document.setStartXref(startXrefOffset); long prev = startXrefOffset; // ---- parse whole chain of xref tables/object streams using PREV reference Set prevSet = new HashSet(); while (prev > 0) { // seek to xref table source.seek(prev); // skip white spaces skipSpaces(); // -- parse xref if (source.peek() == X) { // xref table and trailer // use existing parser to parse xref table parseXrefTable(prev); if (!parseTrailer()) { throw new IOException("Expected trailer object at position: " + source.getPosition()); } COSDictionary trailer = xrefTrailerResolver.getCurrentTrailer(); // check for a XRef stream, it may contain some object ids of compressed objects if(trailer.containsKey(COSName.XREF_STM)) { int streamOffset = trailer.getInt(COSName.XREF_STM); // check the xref stream reference fixedOffset = checkXRefOffset(streamOffset); if (fixedOffset > -1 && fixedOffset != streamOffset) { LOG.warn("/XRefStm offset " + streamOffset + " is incorrect, corrected to " + fixedOffset); streamOffset = (int)fixedOffset; trailer.setInt(COSName.XREF_STM, streamOffset); } if (streamOffset > 0) { source.seek(streamOffset); skipSpaces(); try { parseXrefObjStream(prev, false); } catch (IOException ex) { if (isLenient) { LOG.error("Failed to parse /XRefStm at offset " + streamOffset, ex); } else { throw ex; } } } else { if(isLenient) { LOG.error("Skipped XRef stream due to a corrupt offset:"+streamOffset); } else { throw new IOException("Skipped XRef stream due to a corrupt offset:"+streamOffset); } } } prev = trailer.getLong(COSName.PREV); if (prev > 0) { // check the xref table reference fixedOffset = checkXRefOffset(prev); if (fixedOffset > -1 && fixedOffset != prev) { prev = fixedOffset; trailer.setLong(COSName.PREV, prev); } } } else { // parse xref stream prev = parseXrefObjStream(prev, true); if (prev > 0) { // check the xref table reference fixedOffset = checkXRefOffset(prev); if (fixedOffset > -1 && fixedOffset != prev) { prev = fixedOffset; COSDictionary trailer = xrefTrailerResolver.getCurrentTrailer(); trailer.setLong(COSName.PREV, prev); } } } if (prevSet.contains(prev)) { throw new IOException("/Prev loop at offset " + prev); } prevSet.add(prev); } // ---- build valid xrefs out of the xref chain xrefTrailerResolver.setStartxref(startXrefOffset); COSDictionary trailer = xrefTrailerResolver.getTrailer(); document.setTrailer(trailer); document.setIsXRefStream(XRefType.STREAM == xrefTrailerResolver.getXrefType()); // check the offsets of all referenced objects checkXrefOffsets(); // copy xref table document.addXRefTable(xrefTrailerResolver.getXrefTable()); return trailer; } /** * Parses an xref object stream starting with indirect object id. * * @return value of PREV item in dictionary or -1 if no such item exists */ private long parseXrefObjStream(long objByteOffset, boolean isStandalone) throws IOException { // ---- parse indirect object head long objectNumber = readObjectNumber(); // remember the highest XRef object number to avoid it being reused in incremental saving long currentHighestXRefObjectNumber = document.getHighestXRefObjectNumber(); document.setHighestXRefObjectNumber(Math.max(currentHighestXRefObjectNumber, objectNumber)); readGenerationNumber(); readExpectedString(OBJ_MARKER, true); COSDictionary dict = parseCOSDictionary(); COSStream xrefStream = parseCOSStream(dict); parseXrefStream(xrefStream, objByteOffset, isStandalone); xrefStream.close(); return dict.getLong(COSName.PREV); } /** * Looks for and parses startxref. We first look for last '%%EOF' marker (within last * {@link #DEFAULT_TRAIL_BYTECOUNT} bytes (or range set via {@link #setEOFLookupRange(int)}) and go back to find * startxref. * * @return the offset of StartXref * @throws IOException If something went wrong. */ protected final long getStartxrefOffset() throws IOException { byte[] buf; long skipBytes; // read trailing bytes into buffer try { final int trailByteCount = (fileLen < readTrailBytes) ? (int) fileLen : readTrailBytes; buf = new byte[trailByteCount]; skipBytes = fileLen - trailByteCount; source.seek(skipBytes); int off = 0; int readBytes; while (off < trailByteCount) { readBytes = source.read(buf, off, trailByteCount - off); // in order to not get stuck in a loop we check readBytes (this should never happen) if (readBytes < 1) { throw new IOException( "No more bytes to read for trailing buffer, but expected: " + (trailByteCount - off)); } off += readBytes; } } finally { source.seek(0); } // find last '%%EOF' int bufOff = lastIndexOf(EOF_MARKER, buf, buf.length); if (bufOff < 0) { if (isLenient) { // in lenient mode the '%%EOF' isn't needed bufOff = buf.length; LOG.debug("Missing end of file marker '" + new String(EOF_MARKER) + "'"); } else { throw new IOException("Missing end of file marker '" + new String(EOF_MARKER) + "'"); } } // find last startxref preceding EOF marker bufOff = lastIndexOf(STARTXREF, buf, bufOff); if (bufOff < 0) { throw new IOException("Missing 'startxref' marker."); } else { return skipBytes + bufOff; } } /** * Searches last appearance of pattern within buffer. Lookup before _lastOff and goes back until 0. * * @param pattern pattern to search for * @param buf buffer to search pattern in * @param endOff offset (exclusive) where lookup starts at * * @return start offset of pattern within buffer or -1 if pattern could not be found */ protected int lastIndexOf(final char[] pattern, final byte[] buf, final int endOff) { final int lastPatternChOff = pattern.length - 1; int bufOff = endOff; int patOff = lastPatternChOff; char lookupCh = pattern[patOff]; while (--bufOff >= 0) { if (buf[bufOff] == lookupCh) { if (--patOff < 0) { // whole pattern matched return bufOff; } // matched current char, advance to preceding one lookupCh = pattern[patOff]; } else if (patOff < lastPatternChOff) { // no char match but already matched some chars; reset patOff = lastPatternChOff; lookupCh = pattern[patOff]; } } return -1; } /** * Return true if parser is lenient. Meaning auto healing capacity of the parser are used. * * @return true if parser is lenient */ public boolean isLenient() { return isLenient; } /** * Change the parser leniency flag. * * This method can only be called before the parsing of the file. * * @param lenient try to handle malformed PDFs. * */ public void setLenient(boolean lenient) { if (initialParseDone) { throw new IllegalArgumentException("Cannot change leniency after parsing"); } this.isLenient = lenient; } /** * Creates a unique object id using object number and object generation * number. (requires object number < 2^31)) */ private long getObjectId(final COSObject obj) { return obj.getObjectNumber() << 32 | obj.getGenerationNumber(); } /** * Adds all from newObjects to toBeParsedList if it is not an COSObject or * we didn't add this COSObject already (checked via addedObjects). */ private void addNewToList(final Queue toBeParsedList, final Collection newObjects, final Set addedObjects) { for (COSBase newObject : newObjects) { addNewToList(toBeParsedList, newObject, addedObjects); } } /** * Adds newObject to toBeParsedList if it is not an COSObject or we didn't * add this COSObject already (checked via addedObjects). Simple objects are * not added because nothing is done with them when toBeParsedList is * processed. */ private void addNewToList(final Queue toBeParsedList, final COSBase newObject, final Set addedObjects) { if (newObject instanceof COSObject) { final long objId = getObjectId((COSObject) newObject); if (!addedObjects.add(objId)) { return; } toBeParsedList.add(newObject); } else if (newObject instanceof COSDictionary || newObject instanceof COSArray) { toBeParsedList.add(newObject); } } /** * Will parse every object necessary to load a single page from the pdf document. We try our * best to order objects according to offset in file before reading to minimize seek operations. * * @param dict the COSObject from the parent pages. * @param excludeObjects dictionary object reference entries with these names will not be parsed * * @throws IOException if something went wrong */ protected void parseDictObjects(COSDictionary dict, COSName... excludeObjects) throws IOException { // ---- create queue for objects waiting for further parsing final Queue toBeParsedList = new LinkedList(); // offset ordered object map final TreeMap> objToBeParsed = new TreeMap>(); // in case of compressed objects offset points to stmObj final Set parsedObjects = new HashSet(); final Set addedObjects = new HashSet(); addExcludedToList(excludeObjects, dict, parsedObjects); addNewToList(toBeParsedList, dict.getValues(), addedObjects); // ---- go through objects to be parsed while (!(toBeParsedList.isEmpty() && objToBeParsed.isEmpty())) { // -- first get all COSObject from other kind of objects and // put them in objToBeParsed; afterwards toBeParsedList is empty COSBase baseObj; while ((baseObj = toBeParsedList.poll()) != null) { if (baseObj instanceof COSDictionary) { addNewToList(toBeParsedList, ((COSDictionary) baseObj).getValues(), addedObjects); } else if (baseObj instanceof COSArray) { for (COSBase cosBase : ((COSArray) baseObj)) { addNewToList(toBeParsedList, cosBase, addedObjects); } } else if (baseObj instanceof COSObject) { COSObject obj = (COSObject) baseObj; long objId = getObjectId(obj); COSObjectKey objKey = new COSObjectKey(obj.getObjectNumber(), obj.getGenerationNumber()); if (!parsedObjects.contains(objId)) { Long fileOffset = document.getXrefTable().get(objKey); if (fileOffset == null && isLenient && bfSearchCOSObjectKeyOffsets != null) { fileOffset = bfSearchCOSObjectKeyOffsets.get(objKey); if (fileOffset != null) { LOG.debug("Set missing " + fileOffset + " for object " + objKey); document.getXrefTable().put(objKey, fileOffset); } } // it is allowed that object references point to null, // thus we have to test if (fileOffset != null && fileOffset != 0) { if (fileOffset > 0) { objToBeParsed.put(fileOffset, Collections.singletonList(obj)); } else { // negative offset means we have a compressed // object within object stream; // get offset of object stream COSObjectKey key = new COSObjectKey((int) -fileOffset, 0); fileOffset = document.getXrefTable().get(key); if (fileOffset == null || fileOffset <= 0) { if (isLenient && bfSearchCOSObjectKeyOffsets != null) { fileOffset = bfSearchCOSObjectKeyOffsets.get(key); if (fileOffset != null) { LOG.debug("Set missing " + fileOffset + " for object " + key); document.getXrefTable().put(key, fileOffset); } } else { throw new IOException( "Invalid object stream xref object reference for key '" + objKey + "': " + fileOffset); } } List stmObjects = objToBeParsed.get(fileOffset); if (stmObjects == null) { stmObjects = new ArrayList(); objToBeParsed.put(fileOffset, stmObjects); } // java does not have a test for immutable else if (!(stmObjects instanceof ArrayList)) { throw new IOException(obj + " cannot be assigned to offset " + fileOffset + ", this belongs to " + stmObjects.get(0)); } stmObjects.add(obj); } } else { // NULL object COSObject pdfObject = document.getObjectFromPool(objKey); pdfObject.setObject(COSNull.NULL); } } } } // ---- read first COSObject with smallest offset // resulting object will be added to toBeParsedList if (objToBeParsed.isEmpty()) { break; } for (COSObject obj : objToBeParsed.remove(objToBeParsed.firstKey())) { COSBase parsedObj = parseObjectDynamically(obj, false); if (parsedObj != null) { obj.setObject(parsedObj); addNewToList(toBeParsedList, parsedObj, addedObjects); parsedObjects.add(getObjectId(obj)); } } } } // add objects not to be parsed to list of already parsed objects private void addExcludedToList(COSName[] excludeObjects, COSDictionary dict, final Set parsedObjects) { if (excludeObjects != null) { for (COSName objName : excludeObjects) { COSBase baseObj = dict.getItem(objName); if (baseObj instanceof COSObject) { parsedObjects.add(getObjectId((COSObject) baseObj)); } } } } /** * This will parse the next object from the stream and add it to the local state. * * @param obj object to be parsed (we only take object number and generation number for lookup start offset) * @param requireExistingNotCompressedObj if true object to be parsed must not be contained within * compressed stream * @return the parsed object (which is also added to document object) * * @throws IOException If an IO error occurs. */ protected final COSBase parseObjectDynamically(COSObject obj, boolean requireExistingNotCompressedObj) throws IOException { return parseObjectDynamically(obj.getObjectNumber(), obj.getGenerationNumber(), requireExistingNotCompressedObj); } /** * This will parse the next object from the stream and add it to the local state. * It's reduced to parsing an indirect object. * * @param objNr object number of object to be parsed * @param objGenNr object generation number of object to be parsed * @param requireExistingNotCompressedObj if true the object to be parsed must be defined in xref * (comment: null objects may be missing from xref) and it must not be a compressed object within object stream * (this is used to circumvent being stuck in a loop in a malicious PDF) * * @return the parsed object (which is also added to document object) * * @throws IOException If an IO error occurs. */ protected COSBase parseObjectDynamically(long objNr, int objGenNr, boolean requireExistingNotCompressedObj) throws IOException { // ---- create object key and get object (container) from pool final COSObjectKey objKey = new COSObjectKey(objNr, objGenNr); final COSObject pdfObject = document.getObjectFromPool(objKey); if (pdfObject.getObject() == null) { // not previously parsed // ---- read offset or object stream object number from xref table Long offsetOrObjstmObNr = document.getXrefTable().get(objKey); // maybe something is wrong with the xref table -> perform brute force search for all objects if (offsetOrObjstmObNr == null && isLenient && bfSearchCOSObjectKeyOffsets != null) { offsetOrObjstmObNr = bfSearchCOSObjectKeyOffsets.get(objKey); if (offsetOrObjstmObNr != null) { LOG.debug("Set missing offset " + offsetOrObjstmObNr + " for object " + objKey); document.getXrefTable().put(objKey, offsetOrObjstmObNr); } } // sanity test to circumvent loops with broken documents if (requireExistingNotCompressedObj && ((offsetOrObjstmObNr == null) || (offsetOrObjstmObNr <= 0))) { throw new IOException("Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration()); } // maybe something is wrong with the xref table -> perform brute force search for all objects if (offsetOrObjstmObNr == null && isLenient && bfSearchCOSObjectKeyOffsets == null) { bfSearchForObjects(); if (bfSearchCOSObjectKeyOffsets != null && !bfSearchCOSObjectKeyOffsets.isEmpty()) { LOG.debug("Add all new read objects from brute force search to the xref table"); Map xrefOffset = document.getXrefTable(); final Set> entries = bfSearchCOSObjectKeyOffsets.entrySet(); for (Entry entry : entries) { COSObjectKey key = entry.getKey(); // add all missing objects to the xref table if (!xrefOffset.containsKey(key)) { xrefOffset.put(key, entry.getValue()); } } offsetOrObjstmObNr = xrefOffset.get(objKey); } } if (offsetOrObjstmObNr == null) { // not defined object -> NULL object (Spec. 1.7, chap. 3.2.9) pdfObject.setObject(COSNull.NULL); } else if (offsetOrObjstmObNr > 0) { // offset of indirect object in file parseFileObject(offsetOrObjstmObNr, objKey, pdfObject); } else { // xref value is object nr of object stream containing object to be parsed // since our object was not found it means object stream was not parsed so far parseObjectStream((int) -offsetOrObjstmObNr); } } return pdfObject.getObject(); } private void parseFileObject(Long offsetOrObjstmObNr, final COSObjectKey objKey, final COSObject pdfObject) throws IOException { // ---- go to object start source.seek(offsetOrObjstmObNr); // ---- we must have an indirect object final long readObjNr = readObjectNumber(); final int readObjGen = readGenerationNumber(); readExpectedString(OBJ_MARKER, true); // ---- consistency check if ((readObjNr != objKey.getNumber()) || (readObjGen != objKey.getGeneration())) { throw new IOException("XREF for " + objKey.getNumber() + ":" + objKey.getGeneration() + " points to wrong object: " + readObjNr + ":" + readObjGen + " at offset " + offsetOrObjstmObNr); } skipSpaces(); COSBase pb = parseDirObject(); String endObjectKey = readString(); if (endObjectKey.equals(STREAM_STRING)) { source.rewind(endObjectKey.getBytes(ISO_8859_1).length); if (pb instanceof COSDictionary) { COSStream stream = parseCOSStream((COSDictionary) pb); if (securityHandler != null) { securityHandler.decryptStream(stream, objKey.getNumber(), objKey.getGeneration()); } pb = stream; } else { // this is not legal // the combination of a dict and the stream/endstream // forms a complete stream object throw new IOException("Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ")."); } skipSpaces(); endObjectKey = readLine(); // we have case with a second 'endstream' before endobj if (!endObjectKey.startsWith(ENDOBJ_STRING) && endObjectKey.startsWith(ENDSTREAM_STRING)) { endObjectKey = endObjectKey.substring(9).trim(); if (endObjectKey.length() == 0) { // no other characters in extra endstream line // read next line endObjectKey = readLine(); } } } else if (securityHandler != null) { securityHandler.decrypt(pb, objKey.getNumber(), objKey.getGeneration()); } pdfObject.setObject(pb); if (!endObjectKey.startsWith(ENDOBJ_STRING)) { if (isLenient) { LOG.warn("Object (" + readObjNr + ":" + readObjGen + ") at offset " + offsetOrObjstmObNr + " does not end with 'endobj' but with '" + endObjectKey + "'"); } else { throw new IOException("Object (" + readObjNr + ":" + readObjGen + ") at offset " + offsetOrObjstmObNr + " does not end with 'endobj' but with '" + endObjectKey + "'"); } } } private void parseObjectStream(int objstmObjNr) throws IOException { final COSBase objstmBaseObj = parseObjectDynamically(objstmObjNr, 0, true); if (objstmBaseObj instanceof COSStream) { // parse object stream PDFObjectStreamParser parser; try { parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document); } catch (IOException ex) { if (isLenient) { LOG.error("object stream " + objstmObjNr + " could not be parsed due to an exception", ex); return; } else { throw ex; } } try { parser.parse(); } catch(IOException exception) { if (isLenient) { LOG.debug("Stop reading object stream "+objstmObjNr+" due to an exception", exception); // the error is handled in parseDictObjects return; } else { throw exception; } } // register all objects which are referenced to be contained in object stream for (COSObject next : parser.getObjects()) { COSObjectKey stmObjKey = new COSObjectKey(next); Long offset = xrefTrailerResolver.getXrefTable().get(stmObjKey); if (offset != null && offset == -objstmObjNr) { COSObject stmObj = document.getObjectFromPool(stmObjKey); stmObj.setObject(next.getObject()); } } } } /** * Returns length value referred to or defined in given object. */ private COSNumber getLength(final COSBase lengthBaseObj, final COSName streamType) throws IOException { if (lengthBaseObj == null) { return null; } COSNumber retVal = null; // maybe length was given directly if (lengthBaseObj instanceof COSNumber) { retVal = (COSNumber) lengthBaseObj; } // length in referenced object else if (lengthBaseObj instanceof COSObject) { COSObject lengthObj = (COSObject) lengthBaseObj; COSBase length = lengthObj.getObject(); if (length == null) { // not read so far, keep current stream position final long curFileOffset = source.getPosition(); boolean isObjectStream = COSName.OBJ_STM.equals(streamType); parseObjectDynamically(lengthObj, isObjectStream); // reset current stream position source.seek(curFileOffset); length = lengthObj.getObject(); } if (length == null) { throw new IOException("Length object content was not read."); } if (COSNull.NULL == length) { LOG.warn("Length object (" + lengthObj.getObjectNumber() + " " + lengthObj.getGenerationNumber() + ") not found"); return null; } if (!(length instanceof COSNumber)) { throw new IOException("Wrong type of referenced length object " + lengthObj + ": " + length.getClass().getSimpleName()); } retVal = (COSNumber) length; } else { throw new IOException("Wrong type of length object: " + lengthBaseObj.getClass().getSimpleName()); } return retVal; } private static final int STREAMCOPYBUFLEN = 8192; private final byte[] streamCopyBuf = new byte[STREAMCOPYBUFLEN]; /** * This will read a COSStream from the input stream using length attribute within dictionary. If * length attribute is a indirect reference it is first resolved to get the stream length. This * means we copy stream data without testing for 'endstream' or 'endobj' and thus it is no * problem if these keywords occur within stream. We require 'endstream' to be found after * stream data is read. * * @param dic dictionary that goes with this stream. * * @return parsed pdf stream. * * @throws IOException if an error occurred reading the stream, like problems with reading * length attribute, stream does not end with 'endstream' after data read, stream too short etc. */ protected COSStream parseCOSStream(COSDictionary dic) throws IOException { COSStream stream = document.createCOSStream(dic); // read 'stream'; this was already tested in parseObjectsDynamically() readString(); skipWhiteSpaces(); /* * This needs to be dic.getItem because when we are parsing, the underlying object might still be null. */ COSNumber streamLengthObj = getLength(dic.getItem(COSName.LENGTH), dic.getCOSName(COSName.TYPE)); if (streamLengthObj == null) { if (isLenient) { LOG.warn("The stream doesn't provide any stream length, using fallback readUntilEnd, at offset " + source.getPosition()); } else { throw new IOException("Missing length for stream."); } } // get output stream to copy data to if (streamLengthObj != null && validateStreamLength(streamLengthObj.longValue())) { OutputStream out = stream.createRawOutputStream(); try { readValidStream(out, streamLengthObj); } finally { out.close(); // restore original (possibly incorrect) length stream.setItem(COSName.LENGTH, streamLengthObj); } } else { OutputStream out = stream.createRawOutputStream(); try { readUntilEndStream(new EndstreamOutputStream(out)); } finally { out.close(); // restore original (possibly incorrect) length if (streamLengthObj != null) { stream.setItem(COSName.LENGTH, streamLengthObj); } } } String endStream = readString(); if (endStream.equals("endobj") && isLenient) { LOG.warn("stream ends with 'endobj' instead of 'endstream' at offset " + source.getPosition()); // avoid follow-up warning about missing endobj source.rewind(ENDOBJ.length); } else if (endStream.length() > 9 && isLenient && endStream.substring(0,9).equals(ENDSTREAM_STRING)) { LOG.warn("stream ends with '" + endStream + "' instead of 'endstream' at offset " + source.getPosition()); // unread the "extra" bytes source.rewind(endStream.substring(9).getBytes(ISO_8859_1).length); } else if (!endStream.equals(ENDSTREAM_STRING)) { throw new IOException( "Error reading stream, expected='endstream' actual='" + endStream + "' at offset " + source.getPosition()); } return stream; } /** * This method will read through the current stream object until * we find the keyword "endstream" meaning we're at the end of this * object. Some pdf files, however, forget to write some endstream tags * and just close off objects with an "endobj" tag so we have to handle * this case as well. * * This method is optimized using buffered IO and reduced number of * byte compare operations. * * @param out stream we write out to. * * @throws IOException if something went wrong */ private void readUntilEndStream( final OutputStream out ) throws IOException { int bufSize; int charMatchCount = 0; byte[] keyw = ENDSTREAM; // last character position of shortest keyword ('endobj') final int quickTestOffset = 5; // read next chunk into buffer; already matched chars are added to beginning of buffer while ( ( bufSize = source.read( strmBuf, charMatchCount, STRMBUFLEN - charMatchCount ) ) > 0 ) { bufSize += charMatchCount; int bIdx = charMatchCount; int quickTestIdx; // iterate over buffer, trying to find keyword match for ( int maxQuicktestIdx = bufSize - quickTestOffset; bIdx < bufSize; bIdx++ ) { // reduce compare operations by first test last character we would have to // match if current one matches; if it is not a character from keywords // we can move behind the test character; this shortcut is inspired by the // Boyer-Moore string search algorithm and can reduce parsing time by approx. 20% quickTestIdx = bIdx + quickTestOffset; if (charMatchCount == 0 && quickTestIdx < maxQuicktestIdx) { final byte ch = strmBuf[quickTestIdx]; if ( ( ch > 't' ) || ( ch < 'a' ) ) { // last character we would have to match if current character would match // is not a character from keywords -> jump behind and start over bIdx = quickTestIdx; continue; } } // could be negative - but we only compare to ASCII final byte ch = strmBuf[bIdx]; if ( ch == keyw[ charMatchCount ] ) { if ( ++charMatchCount == keyw.length ) { // match found bIdx++; break; } } else { if ( ( charMatchCount == 3 ) && ( ch == ENDOBJ[ charMatchCount ] ) ) { // maybe ENDSTREAM is missing but we could have ENDOBJ keyw = ENDOBJ; charMatchCount++; } else { // no match; incrementing match start by 1 would be dumb since we already know // matched chars depending on current char read we may already have beginning // of a new match: 'e': first char matched; 'n': if we are at match position // idx 7 we already read 'e' thus 2 chars matched for each other char we have // to start matching first keyword char beginning with next read position charMatchCount = ( ch == E ) ? 1 : ( ( ch == N ) && ( charMatchCount == 7 ) ) ? 2 : 0; // search again for 'endstream' keyw = ENDSTREAM; } } } int contentBytes = Math.max( 0, bIdx - charMatchCount ); // write buffer content until first matched char to output stream if ( contentBytes > 0 ) { out.write( strmBuf, 0, contentBytes ); } if ( charMatchCount == keyw.length ) { // keyword matched; unread matched keyword (endstream/endobj) and following buffered content source.rewind( bufSize - contentBytes ); break; } else { // copy matched chars at start of buffer System.arraycopy( keyw, 0, strmBuf, 0, charMatchCount ); } } // this writes a lonely CR or drops trailing CR LF and LF out.flush(); } private void readValidStream(OutputStream out, COSNumber streamLengthObj) throws IOException { long remainBytes = streamLengthObj.longValue(); while (remainBytes > 0) { final int chunk = (remainBytes > STREAMCOPYBUFLEN) ? STREAMCOPYBUFLEN : (int) remainBytes; final int readBytes = source.read(streamCopyBuf, 0, chunk); if (readBytes <= 0) { // shouldn't happen, the stream length has already been validated throw new IOException("read error at offset " + source.getPosition() + ": expected " + chunk + " bytes, but read() returns " + readBytes); } out.write(streamCopyBuf, 0, readBytes); remainBytes -= readBytes; } } private boolean validateStreamLength(long streamLength) throws IOException { boolean streamLengthIsValid = true; long originOffset = source.getPosition(); long expectedEndOfStream = originOffset + streamLength; if (expectedEndOfStream > fileLen) { streamLengthIsValid = false; LOG.warn("The end of the stream is out of range, using workaround to read the stream, " + "stream start position: " + originOffset + ", length: " + streamLength + ", expected end position: " + expectedEndOfStream); } else { source.seek(expectedEndOfStream); skipSpaces(); if (!isString(ENDSTREAM)) { streamLengthIsValid = false; LOG.warn("The end of the stream doesn't point to the correct offset, using workaround to read the stream, " + "stream start position: " + originOffset + ", length: " + streamLength + ", expected end position: " + expectedEndOfStream); } source.seek(originOffset); } return streamLengthIsValid; } /** * Check if the cross reference table/stream can be found at the current offset. * * @param startXRefOffset * @return the revised offset * @throws IOException */ private long checkXRefOffset(long startXRefOffset) throws IOException { // repair mode isn't available in non-lenient mode if (!isLenient) { return startXRefOffset; } source.seek(startXRefOffset); skipSpaces(); if (source.peek() == X && isString(XREF_TABLE)) { return startXRefOffset; } if (startXRefOffset > 0) { if (checkXRefStreamOffset(startXRefOffset)) { return startXRefOffset; } else { return calculateXRefFixedOffset(startXRefOffset, false); } } // can't find a valid offset return -1; } /** * Check if the cross reference stream can be found at the current offset. * * @param startXRefOffset the expected start offset of the XRef stream * @return the revised offset * @throws IOException if something went wrong */ private boolean checkXRefStreamOffset(long startXRefOffset) throws IOException { // repair mode isn't available in non-lenient mode if (!isLenient || startXRefOffset == 0) { return true; } // seek to offset-1 source.seek(startXRefOffset-1); int nextValue = source.read(); // the first character has to be a whitespace, and then a digit if (isWhitespace(nextValue)) { skipSpaces(); if (isDigit()) { try { // it's a XRef stream readObjectNumber(); readGenerationNumber(); readExpectedString(OBJ_MARKER, true); // check the dictionary to avoid false positives COSDictionary dict = parseCOSDictionary(); source.seek(startXRefOffset); if ("XRef".equals(dict.getNameAsString(COSName.TYPE))) { return true; } } catch (IOException exception) { // there wasn't an object of a xref stream source.seek(startXRefOffset); } } } return false; } /** * Try to find a fixed offset for the given xref table/stream. * * @param objectOffset the given offset where to look at * @param streamsOnly search for xref streams only * @return the fixed offset * * @throws IOException if something went wrong */ private long calculateXRefFixedOffset(long objectOffset, boolean streamsOnly) throws IOException { if (objectOffset < 0) { LOG.error("Invalid object offset " + objectOffset + " when searching for a xref table/stream"); return 0; } // start a brute force search for all xref tables and try to find the offset we are looking for long newOffset = bfSearchForXRef(objectOffset, streamsOnly); if (newOffset > -1) { LOG.debug("Fixed reference for xref table/stream " + objectOffset + " -> " + newOffset); return newOffset; } LOG.error("Can't find the object xref table/stream at offset " + objectOffset); return 0; } private boolean validateXrefOffsets(Map xrefOffset) throws IOException { if (xrefOffset == null) { return true; } for (Entry objectEntry : xrefOffset.entrySet()) { COSObjectKey objectKey = objectEntry.getKey(); Long objectOffset = objectEntry.getValue(); // a negative offset number represents an object number itself // see type 2 entry in xref stream if (objectOffset != null && objectOffset >= 0 && !checkObjectKey(objectKey, objectOffset)) { LOG.debug("Stop checking xref offsets as at least one (" + objectKey + ") couldn't be dereferenced"); return false; } } return true; } /** * Check the XRef table by dereferencing all objects and fixing the offset if necessary. * * @throws IOException if something went wrong. */ private void checkXrefOffsets() throws IOException { // repair mode isn't available in non-lenient mode if (!isLenient) { return; } Map xrefOffset = xrefTrailerResolver.getXrefTable(); if (!validateXrefOffsets(xrefOffset)) { bfSearchForObjects(); if (bfSearchCOSObjectKeyOffsets != null && !bfSearchCOSObjectKeyOffsets.isEmpty()) { LOG.debug("Replaced read xref table with the results of a brute force search"); xrefOffset.clear(); xrefOffset.putAll(bfSearchCOSObjectKeyOffsets); } } } /** * Check if the given object can be found at the given offset. * * @param objectKey the object we are looking for * @param offset the offset where to look * @return returns true if the given object can be dereferenced at the given offset * @throws IOException if something went wrong */ private boolean checkObjectKey(COSObjectKey objectKey, long offset) throws IOException { // there can't be any object at the very beginning of a pdf if (offset < MINIMUM_SEARCH_OFFSET) { return false; } boolean objectKeyFound = false; try { source.seek(offset); // try to read the given object/generation number if (objectKey.getNumber() == readObjectNumber()) { int genNumber = readGenerationNumber(); if (genNumber == objectKey.getGeneration()) { // finally try to read the object marker readExpectedString(OBJ_MARKER, true); objectKeyFound = true; } else if (isLenient && genNumber > objectKey.getGeneration()) { // finally try to read the object marker readExpectedString(OBJ_MARKER, true); objectKeyFound = true; objectKey.fixGeneration(genNumber); } } } catch (IOException exception) { // Swallow the exception, obviously there isn't any valid object number } // return resulting value return objectKeyFound; } /** * Brute force search for every object in the pdf. * * @throws IOException if something went wrong */ private void bfSearchForObjects() throws IOException { if (bfSearchCOSObjectKeyOffsets == null) { bfSearchForLastEOFMarker(); bfSearchCOSObjectKeyOffsets = new HashMap(); long originOffset = source.getPosition(); long currentOffset = MINIMUM_SEARCH_OFFSET; long lastObjectId = Long.MIN_VALUE; int lastGenID = Integer.MIN_VALUE; long lastObjOffset = Long.MIN_VALUE; char[] endobjString = "ndo".toCharArray(); char[] endobjRemainingString = "bj".toCharArray(); boolean endOfObjFound = false; do { source.seek(currentOffset); int nextChar = source.read(); currentOffset++; if (isWhitespace(nextChar) && isString(OBJ_MARKER)) { long tempOffset = currentOffset - 2; source.seek(tempOffset); int genID = source.peek(); // is the next char a digit? if (isDigit(genID)) { genID -= 48; tempOffset--; source.seek(tempOffset); if (isWhitespace()) { while (tempOffset > MINIMUM_SEARCH_OFFSET && isWhitespace()) { source.seek(--tempOffset); } boolean objectIDFound = false; while (tempOffset > MINIMUM_SEARCH_OFFSET && isDigit()) { source.seek(--tempOffset); objectIDFound = true; } if (objectIDFound) { source.read(); long objectId = readObjectNumber(); if (lastObjOffset > 0) { // add the former object ID only if there was a subsequent object ID bfSearchCOSObjectKeyOffsets .put(new COSObjectKey(lastObjectId, lastGenID), lastObjOffset); } lastObjectId = objectId; lastGenID = genID; lastObjOffset = tempOffset + 1; currentOffset += OBJ_MARKER.length - 1; endOfObjFound = false; } } } } // check for "endo" as abbreviation for "endobj", as the pdf may be cut off // in the middle of the keyword, see PDFBOX-3936. // We could possibly implement a more intelligent algorithm if necessary else if (nextChar == 'e' && isString(endobjString)) { currentOffset += endobjString.length; source.seek(currentOffset); if (source.isEOF()) { endOfObjFound = true; continue; } if (isString(endobjRemainingString)) { currentOffset += endobjRemainingString.length; endOfObjFound = true; continue; } } } while (currentOffset < lastEOFMarker && !source.isEOF()); if ((lastEOFMarker < Long.MAX_VALUE || endOfObjFound) && lastObjOffset > 0) { // if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker // the last object id has to be added here so that it can't get lost as there isn't any subsequent // object id bfSearchCOSObjectKeyOffsets.put(new COSObjectKey(lastObjectId, lastGenID), lastObjOffset); } // reestablish origin position source.seek(originOffset); } } /** * Search for the offset of the given xref table/stream among those found by a brute force search. * * @param streamsOnly search for xref streams only * @return the offset of the xref entry * @throws IOException if something went wrong */ private long bfSearchForXRef(long xrefOffset, boolean streamsOnly) throws IOException { long newOffset = -1; long newOffsetTable = -1; long newOffsetStream = -1; if (!streamsOnly) { bfSearchForXRefTables(); } bfSearchForXRefStreams(); if (!streamsOnly && bfSearchXRefTablesOffsets != null) { // TODO to be optimized, this won't work in every case newOffsetTable = searchNearestValue(bfSearchXRefTablesOffsets, xrefOffset); } if (bfSearchXRefStreamsOffsets != null) { // TODO to be optimized, this won't work in every case newOffsetStream = searchNearestValue(bfSearchXRefStreamsOffsets, xrefOffset); } // choose the nearest value if (newOffsetTable > -1 && newOffsetStream > -1) { long differenceTable = xrefOffset - newOffsetTable; long differenceStream = xrefOffset - newOffsetStream; if (Math.abs(differenceTable) > Math.abs(differenceStream)) { newOffset = newOffsetStream; bfSearchXRefStreamsOffsets.remove(newOffsetStream); } else { newOffset = newOffsetTable; bfSearchXRefTablesOffsets.remove(newOffsetTable); } } else if (newOffsetTable > -1) { newOffset = newOffsetTable; bfSearchXRefTablesOffsets.remove(newOffsetTable); } else if (newOffsetStream > -1) { newOffset = newOffsetStream; bfSearchXRefStreamsOffsets.remove(newOffsetStream); } return newOffset; } private long searchNearestValue(List values, long offset) { long newValue = -1; Long currentDifference = null; int currentOffsetIndex = -1; int numberOfOffsets = values.size(); // find the nearest value for (int i = 0; i < numberOfOffsets; i++) { long newDifference = offset - values.get(i); // find the nearest offset if (currentDifference == null || (Math.abs(currentDifference) > Math.abs(newDifference))) { currentDifference = newDifference; currentOffsetIndex = i; } } if (currentOffsetIndex > -1) { newValue = values.get(currentOffsetIndex); } return newValue; } /** * Brute force search for all trailer marker. * * @throws IOException if something went wrong */ private boolean bfSearchForTrailer(COSDictionary trailer) throws IOException { Map trailerDicts = new HashMap(); long originOffset = source.getPosition(); source.seek(MINIMUM_SEARCH_OFFSET); while (!source.isEOF()) { // search for trailer marker if (isString(TRAILER_MARKER)) { source.seek(source.getPosition() + TRAILER_MARKER.length); try { boolean rootFound = false; boolean infoFound = false; skipSpaces(); COSDictionary trailerDict = parseCOSDictionary(); StringBuilder trailerKeys = new StringBuilder(); if (trailerDict.containsKey(COSName.ROOT)) { COSBase rootObj = trailerDict.getItem(COSName.ROOT); if (rootObj instanceof COSObject) { long objNumber = ((COSObject) rootObj).getObjectNumber(); int genNumber = ((COSObject) rootObj).getGenerationNumber(); trailerKeys.append(objNumber).append(" "); trailerKeys.append(genNumber).append(" "); rootFound = true; } } if (trailerDict.containsKey(COSName.INFO)) { COSBase infoObj = trailerDict.getItem(COSName.INFO); if (infoObj instanceof COSObject) { long objNumber = ((COSObject) infoObj).getObjectNumber(); int genNumber = ((COSObject) infoObj).getGenerationNumber(); trailerKeys.append(objNumber).append(" "); trailerKeys.append(genNumber).append(" "); infoFound = true; } } if (rootFound && infoFound) { trailerDicts.put(trailerKeys.toString(), trailerDict); } } catch (IOException exception) { continue; } } source.read(); } source.seek(originOffset); // eliminate double entries int trailerdictsSize = trailerDicts.size(); String firstEntry = null; if (trailerdictsSize > 0) { String[] keys = new String[trailerdictsSize]; trailerDicts.keySet().toArray(keys); firstEntry = keys[0]; for (int i = 1; i < trailerdictsSize; i++) { if (firstEntry.equals(keys[i])) { trailerDicts.remove(keys[i]); } } } // continue if one entry is left only if (trailerDicts.size() == 1) { boolean rootFound = false; boolean infoFound = false; COSDictionary trailerDict = trailerDicts.get(firstEntry); COSBase rootObj = trailerDict.getItem(COSName.ROOT); if (rootObj instanceof COSObject) { // check if the dictionary can be dereferenced and is the one we are looking for COSDictionary rootDict = retrieveCOSDictionary((COSObject) rootObj); if (rootDict != null && isCatalog(rootDict)) { rootFound = true; } } COSBase infoObj = trailerDict.getItem(COSName.INFO); if (infoObj instanceof COSObject) { // check if the dictionary can be dereferenced and is the one we are looking for COSDictionary infoDict = retrieveCOSDictionary((COSObject) infoObj); if (infoDict != null && isInfo(infoDict)) { infoFound = true; } } if (rootFound && infoFound) { trailer.setItem(COSName.ROOT, rootObj); trailer.setItem(COSName.INFO, infoObj); if (trailerDict.containsKey(COSName.ENCRYPT)) { COSBase encObj = trailerDict.getItem(COSName.ENCRYPT); if (encObj instanceof COSObject) { // check if the dictionary can be dereferenced // TODO check if the dictionary is an encryption dictionary? COSDictionary encDict = retrieveCOSDictionary((COSObject) encObj); if (encDict != null) { trailer.setItem(COSName.ENCRYPT, encObj); } } } if (trailerDict.containsKey(COSName.ID)) { COSBase idObj = trailerDict.getItem(COSName.ID); if (idObj instanceof COSArray) { trailer.setItem(COSName.ID, idObj); } } return true; } } return false; } /** * Brute force search for the last EOF marker. * * @throws IOException if something went wrong */ private void bfSearchForLastEOFMarker() throws IOException { if (lastEOFMarker == null) { long originOffset = source.getPosition(); source.seek(MINIMUM_SEARCH_OFFSET); while (!source.isEOF()) { // search for EOF marker if (isString(EOF_MARKER)) { long tempMarker = source.getPosition(); source.seek(tempMarker + 5); try { // check if the following data is some valid pdf content // which most likely indicates that the pdf is linearized, // updated or just cut off somewhere in the middle skipSpaces(); if (!isString(XREF_TABLE)) { readObjectNumber(); readGenerationNumber(); } } catch (IOException exception) { // save the EOF marker as the following data is most likely some garbage lastEOFMarker = tempMarker; } } source.read(); } source.seek(originOffset); // no EOF marker found if (lastEOFMarker == null) { lastEOFMarker = Long.MAX_VALUE; } } } /** * Brute force search for all object streams. * * @throws IOException if something went wrong */ private void bfSearchForObjStreams() throws IOException { HashMap bfSearchObjStreamsOffsets = new HashMap(); long originOffset = source.getPosition(); source.seek(MINIMUM_SEARCH_OFFSET); char[] string = " obj".toCharArray(); while (!source.isEOF()) { // search for EOF marker if (isString(OBJ_STREAM)) { long currentPosition = source.getPosition(); // search backwards for the beginning of the object long newOffset = -1; COSObjectKey streamObjectKey = null; boolean objFound = false; for (int i = 1; i < 40 && !objFound; i++) { long currentOffset = currentPosition - (i * 10); if (currentOffset > 0) { source.seek(currentOffset); for (int j = 0; j < 10; j++) { if (isString(string)) { long tempOffset = currentOffset - 1; source.seek(tempOffset); int genID = source.peek(); // is the next char a digit? if (isDigit(genID)) { tempOffset--; source.seek(tempOffset); if (isSpace()) { int length = 0; source.seek(--tempOffset); while (tempOffset > MINIMUM_SEARCH_OFFSET && isDigit()) { source.seek(--tempOffset); length++; } if (length > 0) { source.read(); newOffset = source.getPosition(); long objNumber = readObjectNumber(); int genNumber = readGenerationNumber(); streamObjectKey = new COSObjectKey(objNumber, genNumber); bfSearchObjStreamsOffsets.put(newOffset, streamObjectKey); } } } LOG.debug("Dictionary start for object stream -> " + newOffset); objFound = true; break; } else { currentOffset++; source.read(); } } } } source.seek(currentPosition + OBJ_STREAM.length); } source.read(); } // add all found compressed objects to the brute force search result for (Long offset : bfSearchObjStreamsOffsets.keySet()) { Long bfOffset = bfSearchCOSObjectKeyOffsets.get(bfSearchObjStreamsOffsets.get(offset)); // incomplete object stream found? if (bfOffset == null) { LOG.warn("Skipped incomplete object stream:" + bfSearchObjStreamsOffsets.get(offset) + " at " + offset); continue; } // check if the object was overwritten if (offset.equals(bfOffset)) { source.seek(offset); long stmObjNumber = readObjectNumber(); int stmGenNumber = readGenerationNumber(); readExpectedString(OBJ_MARKER, true); int nrOfObjects = 0; byte[] numbersBytes = null; COSStream stream = null; COSInputStream is = null; try { COSDictionary dict = parseCOSDictionary(); int offsetFirstStream = dict.getInt(COSName.FIRST); nrOfObjects = dict.getInt(COSName.N); // skip the stream if required values are missing if (offsetFirstStream == -1 || nrOfObjects == -1) { continue; } stream = parseCOSStream(dict); if (securityHandler != null) { securityHandler.decryptStream(stream, stmObjNumber, stmGenNumber); } is = stream.createInputStream(); numbersBytes = new byte[offsetFirstStream]; is.read(numbersBytes); } catch (IOException exception) { LOG.debug( "Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset); continue; } finally { if (is != null) { is.close(); } if (stream != null) { stream.close(); } } int start = 0; // skip spaces while (numbersBytes[start] == 32) { start++; } String numbersStr = new String(numbersBytes, start, numbersBytes.length - start, "ISO-8859-1"); numbersStr = numbersStr.replaceAll("\n", " ").replaceAll(" ", " "); String[] numbers = numbersStr.split(" "); if (numbers.length < nrOfObjects * 2) { LOG.debug( "Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset); continue; } Map xrefOffset = xrefTrailerResolver.getXrefTable(); for (int i = 0; i < nrOfObjects; i++) { long objNumber = Long.parseLong(numbers[i * 2]); COSObjectKey objKey = new COSObjectKey(objNumber, 0); Long existingOffset = bfSearchCOSObjectKeyOffsets.get(objKey); if (existingOffset == null || offset > existingOffset) { bfSearchCOSObjectKeyOffsets.put(objKey, -stmObjNumber); xrefOffset.put(objKey, -stmObjNumber); } } } } source.seek(originOffset); } /** * Brute force search for all xref entries (tables). * * @throws IOException if something went wrong */ private void bfSearchForXRefTables() throws IOException { if (bfSearchXRefTablesOffsets == null) { // a pdf may contain more than one xref entry bfSearchXRefTablesOffsets = new Vector(); long originOffset = source.getPosition(); source.seek(MINIMUM_SEARCH_OFFSET); // search for xref tables while (!source.isEOF()) { if (isString(XREF_TABLE)) { long newOffset = source.getPosition(); source.seek(newOffset - 1); // ensure that we don't read "startxref" instead of "xref" if (isWhitespace()) { bfSearchXRefTablesOffsets.add(newOffset); } source.seek(newOffset + 4); } source.read(); } source.seek(originOffset); } } /** * Brute force search for all /XRef entries (streams). * * @throws IOException if something went wrong */ private void bfSearchForXRefStreams() throws IOException { if (bfSearchXRefStreamsOffsets == null) { // a pdf may contain more than one /XRef entry bfSearchXRefStreamsOffsets = new Vector(); long originOffset = source.getPosition(); source.seek(MINIMUM_SEARCH_OFFSET); // search for XRef streams String objString = " obj"; char[] string = objString.toCharArray(); while (!source.isEOF()) { if (isString(XREF_STREAM)) { // search backwards for the beginning of the stream long newOffset = -1; long xrefOffset = source.getPosition(); boolean objFound = false; for (int i = 1; i < 40 && !objFound; i++) { long currentOffset = xrefOffset - (i * 10); if (currentOffset > 0) { source.seek(currentOffset); for (int j = 0; j < 10; j++) { if (isString(string)) { long tempOffset = currentOffset - 1; source.seek(tempOffset); int genID = source.peek(); // is the next char a digit? if (isDigit(genID)) { tempOffset--; source.seek(tempOffset); if (isSpace()) { int length = 0; source.seek(--tempOffset); while (tempOffset > MINIMUM_SEARCH_OFFSET && isDigit()) { source.seek(--tempOffset); length++; } if (length > 0) { source.read(); newOffset = source.getPosition(); } } } LOG.debug("Fixed reference for xref stream " + xrefOffset + " -> " + newOffset); objFound = true; break; } else { currentOffset++; source.read(); } } } } if (newOffset > -1) { bfSearchXRefStreamsOffsets.add(newOffset); } source.seek(xrefOffset + 5); } source.read(); } source.seek(originOffset); } } /** * Rebuild the trailer dictionary if startxref can't be found. * * @return the rebuild trailer dictionary * * @throws IOException if something went wrong */ protected final COSDictionary rebuildTrailer() throws IOException { COSDictionary trailer = null; bfSearchForObjects(); if (bfSearchCOSObjectKeyOffsets != null) { // reset trailer resolver xrefTrailerResolver.reset(); // use the found objects to rebuild the trailer resolver xrefTrailerResolver.nextXrefObj(0, XRefType.TABLE); for (Entry entry : bfSearchCOSObjectKeyOffsets.entrySet()) { xrefTrailerResolver.setXRef(entry.getKey(), entry.getValue()); } xrefTrailerResolver.setStartxref(0); trailer = xrefTrailerResolver.getTrailer(); getDocument().setTrailer(trailer); boolean searchForObjStreamsDone = false; if (!bfSearchForTrailer(trailer)) { // search for the different parts of the trailer dictionary if (!searchForTrailerItems(trailer)) { // root entry wasn't found, maybe it is part of an object stream bfSearchForObjStreams(); searchForObjStreamsDone = true; // search again for the root entry searchForTrailerItems(trailer); } } // prepare decryption if necessary prepareDecryption(); if (!searchForObjStreamsDone) { bfSearchForObjStreams(); } } trailerWasRebuild = true; return trailer; } private boolean searchForTrailerItems(COSDictionary trailer) throws IOException { boolean rootFound = false; for (Entry entry : bfSearchCOSObjectKeyOffsets.entrySet()) { COSDictionary dictionary = retrieveCOSDictionary(entry.getKey(), entry.getValue()); if (dictionary == null) { continue; } // document catalog if (isCatalog(dictionary)) { trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey())); rootFound = true; } // info dictionary else if (isInfo(dictionary)) { trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey())); } // encryption dictionary, if existing, is lost // We can't run "Algorithm 2" from PDF specification because of missing ID } return rootFound; } private COSDictionary retrieveCOSDictionary(COSObject object) throws IOException { COSObjectKey key = new COSObjectKey((COSObject) object); Long offset = bfSearchCOSObjectKeyOffsets.get(key); if (offset != null) { return retrieveCOSDictionary(key, offset); } return null; } private COSDictionary retrieveCOSDictionary(COSObjectKey key, long offset) throws IOException { COSDictionary dictionary = null; // handle compressed objects if (offset < 0) { COSObject compressedObject = document.getObjectFromPool(key); if (compressedObject.getObject() == null) { parseObjectStream((int) -offset); } COSBase baseObject = compressedObject.getObject(); if (baseObject instanceof COSDictionary) { dictionary = (COSDictionary) baseObject; } } else { source.seek(offset); readObjectNumber(); readGenerationNumber(); readExpectedString(OBJ_MARKER, true); if (source.peek() != '<') { return null; } try { dictionary = parseCOSDictionary(); } catch (IOException exception) { LOG.debug("Skipped object " + key + ", either it's corrupt or not a dictionary"); } } return dictionary; } /** * Check if all entries of the pages dictionary are present. Those which can't be dereferenced are removed. * * @param root the root dictionary of the pdf */ protected void checkPages(COSDictionary root) { if (trailerWasRebuild && root != null) { // check if all page objects are dereferenced COSBase pages = root.getDictionaryObject(COSName.PAGES); if (pages instanceof COSDictionary) { checkPagesDictionary((COSDictionary) pages, new HashSet()); } } } private int checkPagesDictionary(COSDictionary pagesDict, Set set) { // check for kids COSBase kids = pagesDict.getDictionaryObject(COSName.KIDS); int numberOfPages = 0; if (kids instanceof COSArray) { COSArray kidsArray = (COSArray) kids; List kidsList = kidsArray.toList(); for (COSBase kid : kidsList) { COSObject kidObject = (COSObject) kid; if (set.contains(kidObject)) { kidsArray.remove(kid); continue; } COSBase kidBaseobject = kidObject.getObject(); // object wasn't dereferenced -> remove it if (kidBaseobject.equals(COSNull.NULL)) { LOG.warn("Removed null object " + kid + " from pages dictionary"); kidsArray.remove(kid); } else if (kidBaseobject instanceof COSDictionary) { COSDictionary kidDictionary = (COSDictionary) kidBaseobject; COSName type = kidDictionary.getCOSName(COSName.TYPE); if (COSName.PAGES.equals(type)) { // process nested pages dictionaries set.add(kidObject); numberOfPages += checkPagesDictionary(kidDictionary, set); } else if (COSName.PAGE.equals(type)) { // count pages numberOfPages++; } } } } // fix counter pagesDict.setInt(COSName.COUNT, numberOfPages); return numberOfPages; } /** * Tell if the dictionary is a PDF catalog. Override this for an FDF catalog. * * @param dictionary * @return true if the given dictionary is a root dictionary */ protected boolean isCatalog(COSDictionary dictionary) { return COSName.CATALOG.equals(dictionary.getCOSName(COSName.TYPE)); } /** * Tell if the dictionary is an info dictionary. * * @param dictionary * @return true if the given dictionary is an info dictionary */ private boolean isInfo(COSDictionary dictionary) { if (dictionary.containsKey(COSName.PARENT) || dictionary.containsKey(COSName.A) || dictionary.containsKey(COSName.DEST)) { return false; } if (!dictionary.containsKey(COSName.MOD_DATE) && !dictionary.containsKey(COSName.TITLE) && !dictionary.containsKey(COSName.AUTHOR) && !dictionary.containsKey(COSName.SUBJECT) && !dictionary.containsKey(COSName.KEYWORDS) && !dictionary.containsKey(COSName.CREATOR) && !dictionary.containsKey(COSName.PRODUCER) && !dictionary.containsKey(COSName.CREATION_DATE)) { return false; } return true; } /** * This will parse the startxref section from the stream. The startxref value is ignored. * * @return the startxref value or -1 on parsing error * @throws IOException If an IO error occurs. */ private long parseStartXref() throws IOException { long startXref = -1; if (isString(STARTXREF)) { readString(); skipSpaces(); // This integer is the byte offset of the first object referenced by the xref or xref stream startXref = readLong(); } return startXref; } /** * Checks if the given string can be found at the current offset. * * @param string the bytes of the string to look for * @return true if the bytes are in place, false if not * @throws IOException if something went wrong */ private boolean isString(byte[] string) throws IOException { boolean bytesMatching = false; if (source.peek() == string[0]) { int length = string.length; byte[] bytesRead = new byte[length]; int numberOfBytes = source.read(bytesRead, 0, length); while (numberOfBytes < length) { int readMore = source.read(bytesRead, numberOfBytes, length - numberOfBytes); if (readMore < 0) { break; } numberOfBytes += readMore; } bytesMatching = Arrays.equals(string, bytesRead); source.rewind(numberOfBytes); } return bytesMatching; } /** * Checks if the given string can be found at the current offset. * * @param string the bytes of the string to look for * @return true if the bytes are in place, false if not * @throws IOException if something went wrong */ private boolean isString(char[] string) throws IOException { boolean bytesMatching = true; long originOffset = source.getPosition(); for (char c : string) { if (source.read() != c) { bytesMatching = false; break; } } source.seek(originOffset); return bytesMatching; } /** * This will parse the trailer from the stream and add it to the state. * * @return false on parsing error * @throws IOException If an IO error occurs. */ private boolean parseTrailer() throws IOException { // parse the last trailer. trailerOffset = source.getPosition(); // PDFBOX-1739 skip extra xref entries in RegisSTAR documents if (isLenient) { int nextCharacter = source.peek(); while (nextCharacter != 't' && isDigit(nextCharacter)) { if (source.getPosition() == trailerOffset) { // warn only the first time LOG.warn("Expected trailer object at position " + trailerOffset + ", keep trying"); } readLine(); nextCharacter = source.peek(); } } if(source.peek() != 't') { return false; } //read "trailer" long currentOffset = source.getPosition(); String nextLine = readLine(); if( !nextLine.trim().equals( "trailer" ) ) { // in some cases the EOL is missing and the trailer immediately // continues with "<<" or with a blank character // even if this does not comply with PDF reference we want to support as many PDFs as possible // Acrobat reader can also deal with this. if (nextLine.startsWith("trailer")) { // we can't just unread a portion of the read data as we don't know if the EOL consist of 1 or 2 bytes int len = "trailer".length(); // jump back right after "trailer" source.seek(currentOffset + len); } else { return false; } } // in some cases the EOL is missing and the trailer continues with " <<" // even if this does not comply with PDF reference we want to support as many PDFs as possible // Acrobat reader can also deal with this. skipSpaces(); COSDictionary parsedTrailer = parseCOSDictionary(); xrefTrailerResolver.setTrailer( parsedTrailer ); skipSpaces(); return true; } /** * Parse the header of a pdf. * * @return true if a PDF header was found * @throws IOException if something went wrong */ protected boolean parsePDFHeader() throws IOException { return parseHeader(PDF_HEADER, PDF_DEFAULT_VERSION); } /** * Parse the header of a fdf. * * @return true if a FDF header was found * @throws IOException if something went wrong */ protected boolean parseFDFHeader() throws IOException { return parseHeader(FDF_HEADER, FDF_DEFAULT_VERSION); } private boolean parseHeader(String headerMarker, String defaultVersion) throws IOException { // read first line String header = readLine(); // some pdf-documents are broken and the pdf-version is in one of the following lines if (!header.contains(headerMarker)) { header = readLine(); while (!header.contains(headerMarker)) { // if a line starts with a digit, it has to be the first one with data in it if ((header.length() > 0) && (Character.isDigit(header.charAt(0)))) { break; } header = readLine(); } } // nothing found if (!header.contains(headerMarker)) { source.seek(0); return false; } //sometimes there is some garbage in the header before the header //actually starts, so lets try to find the header first. int headerStart = header.indexOf( headerMarker ); // greater than zero because if it is zero then there is no point of trimming if ( headerStart > 0 ) { //trim off any leading characters header = header.substring( headerStart, header.length() ); } // This is used if there is garbage after the header on the same line if (header.startsWith(headerMarker) && !header.matches(headerMarker + "\\d.\\d")) { if (header.length() < headerMarker.length() + 3) { // No version number at all, set to 1.4 as default header = headerMarker + defaultVersion; LOG.debug("No version found, set to " + defaultVersion + " as default."); } else { String headerGarbage = header.substring(headerMarker.length() + 3, header.length()) + "\n"; header = header.substring(0, headerMarker.length() + 3); source.rewind(headerGarbage.getBytes(ISO_8859_1).length); } } float headerVersion = -1; try { String[] headerParts = header.split("-"); if (headerParts.length == 2) { headerVersion = Float.parseFloat(headerParts[1]); } } catch (NumberFormatException exception) { LOG.debug("Can't parse the header version.", exception); } if (headerVersion < 0) { if (isLenient) { headerVersion = 1.7f; } else { throw new IOException("Error getting header version: " + header); } } document.setVersion(headerVersion); // rewind source.seek(0); return true; } /** * This will parse the xref table from the stream and add it to the state * The XrefTable contents are ignored. * @param startByteOffset the offset to start at * @return false on parsing error * @throws IOException If an IO error occurs. */ protected boolean parseXrefTable(long startByteOffset) throws IOException { if(source.peek() != 'x') { return false; } String xref = readString(); if( !xref.trim().equals( "xref" ) ) { return false; } // check for trailer after xref String str = readString(); byte[] b = str.getBytes(ISO_8859_1); source.rewind(b.length); // signal start of new XRef xrefTrailerResolver.nextXrefObj( startByteOffset, XRefType.TABLE ); if (str.startsWith("trailer")) { LOG.warn("skipping empty xref table"); return false; } // Xref tables can have multiple sections. Each starts with a starting object id and a count. while(true) { String currentLine = readLine(); String[] splitString = currentLine.split("\\s"); if (splitString.length != 2) { LOG.warn("Unexpected XRefTable Entry: " + currentLine); break; } // first obj id long currObjID = Long.parseLong(splitString[0]); // the number of objects in the xref table int count = Integer.parseInt(splitString[1]); skipSpaces(); for(int i = 0; i < count; i++) { if(source.isEOF() || isEndOfName((char)source.peek())) { break; } if(source.peek() == 't') { break; } //Ignore table contents currentLine = readLine(); splitString = currentLine.split("\\s"); if (splitString.length < 3) { LOG.warn("invalid xref line: " + currentLine); break; } /* This supports the corrupt table as reported in * PDFBOX-474 (XXXX XXX XX n) */ if(splitString[splitString.length-1].equals("n")) { try { long currOffset = Long.parseLong(splitString[0]); int currGenID = Integer.parseInt(splitString[1]); COSObjectKey objKey = new COSObjectKey(currObjID, currGenID); xrefTrailerResolver.setXRef(objKey, currOffset); } catch (NumberFormatException e) { throw new IOException(e); } } else if(!splitString[2].equals("f")) { throw new IOException("Corrupt XRefTable Entry - ObjID:" + currObjID); } currObjID++; skipSpaces(); } skipSpaces(); if (!isDigit()) { break; } } return true; } /** * Fills XRefTrailerResolver with data of given stream. * Stream must be of type XRef. * @param stream the stream to be read * @param objByteOffset the offset to start at * @param isStandalone should be set to true if the stream is not part of a hybrid xref table * @throws IOException if there is an error parsing the stream */ private void parseXrefStream(COSStream stream, long objByteOffset, boolean isStandalone) throws IOException { // the cross reference stream of a hybrid xref table will be added to the existing one // and we must not override the offset and the trailer if ( isStandalone ) { xrefTrailerResolver.nextXrefObj( objByteOffset, XRefType.STREAM ); xrefTrailerResolver.setTrailer( stream ); } PDFXrefStreamParser parser = new PDFXrefStreamParser( stream, document, xrefTrailerResolver ); parser.parse(); } /** * This will get the document that was parsed. The document must be parsed before this is called. When you are done * with this document you must call close() on it to release resources. * * @return The document that was parsed. * * @throws IOException If there is an error getting the document. */ public COSDocument getDocument() throws IOException { if( document == null ) { throw new IOException("You must parse the document first before calling getDocument()"); } return document; } /** * This will get the encryption dictionary. The document must be parsed before this is called. * * @return The encryption dictionary of the document that was parsed. * * @throws IOException If there is an error getting the document. */ public PDEncryption getEncryption() throws IOException { if (document == null) { throw new IOException( "You must parse the document first before calling getEncryption()"); } return encryption; } /** * This will get the AccessPermission. The document must be parsed before this is called. * * @return The access permission of document that was parsed. * * @throws IOException If there is an error getting the document. */ public AccessPermission getAccessPermission() throws IOException { if (document == null) { throw new IOException( "You must parse the document first before calling getAccessPermission()"); } return accessPermission; } /** * Parse the values of the trailer dictionary and return the root object. * * @param trailer The trailer dictionary. * @return The parsed root object. * @throws IOException If an IO error occurs or if the root object is missing in the trailer dictionary. */ protected COSBase parseTrailerValuesDynamically(COSDictionary trailer) throws IOException { // PDFBOX-1557 - ensure that all COSObject are loaded in the trailer // PDFBOX-1606 - after securityHandler has been instantiated for (COSBase trailerEntry : trailer.getValues()) { if (trailerEntry instanceof COSObject) { COSObject tmpObj = (COSObject) trailerEntry; parseObjectDynamically(tmpObj, false); } } // parse catalog or root object COSObject root = (COSObject) trailer.getItem(COSName.ROOT); if (root == null) { throw new IOException("Missing root object specification in trailer."); } return root.getObject(); } /** * Prepare for decryption. * * @throws InvalidPasswordException If the password is incorrect. * @throws IOException if something went wrong */ private void prepareDecryption() throws InvalidPasswordException, IOException { if (encryption == null) { COSBase trailerEncryptItem = document.getTrailer().getItem(COSName.ENCRYPT); if (trailerEncryptItem != null && !(trailerEncryptItem instanceof COSNull)) { if (trailerEncryptItem instanceof COSObject) { COSObject trailerEncryptObj = (COSObject) trailerEncryptItem; parseDictionaryRecursive(trailerEncryptObj); } try { encryption = new PDEncryption(document.getEncryptionDictionary()); DecryptionMaterial decryptionMaterial; if (keyStoreInputStream != null) { KeyStore ks = KeyStore.getInstance("PKCS12"); ks.load(keyStoreInputStream, password.toCharArray()); decryptionMaterial = new PublicKeyDecryptionMaterial(ks, keyAlias, password); } else { decryptionMaterial = new StandardDecryptionMaterial(password); } securityHandler = encryption.getSecurityHandler(); securityHandler.prepareForDecryption(encryption, document.getDocumentID(), decryptionMaterial); accessPermission = securityHandler.getCurrentAccessPermission(); } catch (IOException e) { throw e; } catch (Exception e) { throw new IOException("Error (" + e.getClass().getSimpleName() + ") while creating security handler for decryption", e); } finally { if (keyStoreInputStream != null) { IOUtils.closeQuietly(keyStoreInputStream); } } } } } /** * Resolves all not already parsed objects of a dictionary recursively. * * @param dictionaryObject dictionary to be parsed * @throws IOException if something went wrong * */ private void parseDictionaryRecursive(COSObject dictionaryObject) throws IOException { parseObjectDynamically(dictionaryObject, true); COSDictionary dictionary = (COSDictionary) dictionaryObject.getObject(); for (COSBase value : dictionary.getValues()) { if (value instanceof COSObject) { COSObject object = (COSObject) value; if (object.getObject() == null) { parseDictionaryRecursive(object); } } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy