com.itextpdf.kernel.pdf.PdfReader Maven / Gradle / Ivy
This file is part of the iText (R) project.
Copyright (c) 1998-2022 iText Group NV
Authors: Bruno Lowagie, Paulo Soares, et al.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License version 3
as published by the Free Software Foundation with the addition of the
following permission added to Section 15 as permitted in Section 7(a):
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
See the GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program; if not, see http://www.gnu.org/licenses or write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA, 02110-1301 USA, or download the license from the following URL:
The interactive user interfaces in modified source and object code versions
of this program must display Appropriate Legal Notices, as required under
Section 5 of the GNU Affero General Public License.
In accordance with Section 7(b) of the GNU Affero General Public License,
a covered work must retain the producer line in every PDF that is created
or manipulated using iText.
You can be released from the requirements of the license by purchasing
a commercial license. Buying such a license is mandatory as soon as you
develop commercial activities involving the iText software without
disclosing the source code of your own applications.
These activities include: offering paid services to customers as an ASP,
serving PDFs on the fly in a web application, shipping iText with a closed
source product.
For more information, please contact iText Software Corp. at this
address: [email protected]
package com.itextpdf.kernel.pdf;
import com.itextpdf.io.logs.IoLogMessageConstant;
import com.itextpdf.io.source.ByteBuffer;
import com.itextpdf.io.source.ByteUtils;
import com.itextpdf.io.source.IRandomAccessSource;
import com.itextpdf.io.source.PdfTokenizer;
import com.itextpdf.io.source.RASInputStream;
import com.itextpdf.io.source.RandomAccessFileOrArray;
import com.itextpdf.io.source.RandomAccessSourceFactory;
import com.itextpdf.io.source.WindowRandomAccessSource;
import com.itextpdf.commons.utils.MessageFormatUtil;
import com.itextpdf.kernel.exceptions.InvalidXRefPrevException;
import com.itextpdf.kernel.exceptions.MemoryLimitsAwareException;
import com.itextpdf.kernel.exceptions.PdfException;
import com.itextpdf.kernel.crypto.securityhandler.UnsupportedSecurityHandlerException;
import com.itextpdf.kernel.exceptions.KernelExceptionMessageConstant;
import com.itextpdf.kernel.exceptions.XrefCycledReferencesException;
import com.itextpdf.kernel.pdf.filters.FilterHandlers;
import com.itextpdf.kernel.pdf.filters.IFilterHandler;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import com.itextpdf.kernel.xmp.XMPException;
import com.itextpdf.kernel.xmp.XMPMetaFactory;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
* Reads a PDF document.
public class PdfReader implements Closeable {
* The default {@link StrictnessLevel} to be used.
public static final StrictnessLevel DEFAULT_STRICTNESS_LEVEL = StrictnessLevel.LENIENT;
private static final String endstream1 = "endstream";
private static final String endstream2 = "\nendstream";
private static final String endstream3 = "\r\nendstream";
private static final String endstream4 = "\rendstream";
private static final byte[] endstream = ByteUtils.getIsoBytes("endstream");
private static final byte[] endobj = ByteUtils.getIsoBytes("endobj");
protected static boolean correctStreamLength = true;
private boolean unethicalReading;
private boolean memorySavingMode;
private StrictnessLevel strictnessLevel = DEFAULT_STRICTNESS_LEVEL;
//indicate nearest first Indirect reference object which includes current reading the object, using for PdfString decrypt
private PdfIndirectReference currentIndirectReference;
protected PdfTokenizer tokens;
protected PdfEncryption decrypt;
// here we store only the pdfVersion that is written in the document's header,
// however it could differ from the actual pdf version that could be written in document's catalog
protected PdfVersion headerPdfVersion;
protected long lastXref;
protected long eofPos;
protected PdfDictionary trailer;
protected PdfDocument pdfDocument;
protected PdfAConformanceLevel pdfAConformanceLevel;
protected ReaderProperties properties;
protected boolean encrypted = false;
protected boolean rebuiltXref = false;
protected boolean hybridXref = false;
protected boolean fixedXref = false;
protected boolean xrefStm = false;
* Constructs a new PdfReader.
* @param byteSource source of bytes for the reader
* @param properties properties of the created reader
* @throws IOException if an I/O error occurs
public PdfReader(IRandomAccessSource byteSource, ReaderProperties properties) throws IOException {
this(byteSource, properties, false);
* Reads and parses a PDF document.
* @param is the {@code InputStream} containing the document. If the inputStream is an instance of
* {@link RASInputStream} then the {@link IRandomAccessSource} would be extracted. Otherwise the stream
* is read to the end but is not closed.
* @param properties properties of the created reader
* @throws IOException on error
public PdfReader(InputStream is, ReaderProperties properties) throws IOException {
this(new RandomAccessSourceFactory().extractOrCreateSource(is), properties, true);
* Reads and parses a PDF document.
* @param file the {@code File} containing the document.
* @throws IOException on error
* @throws FileNotFoundException when the specified File is not found
public PdfReader(java.io.File file) throws FileNotFoundException, IOException {
* Reads and parses a PDF document.
* @param is the {@code InputStream} containing the document. If the inputStream is an instance of
* {@link RASInputStream} then the {@link IRandomAccessSource} would be extracted. Otherwise the stream
* is read to the end but is not closed.
* @throws IOException on error
public PdfReader(InputStream is) throws IOException {
this(is, new ReaderProperties());
* Reads and parses a PDF document.
* @param filename the file name of the document
* @param properties properties of the created reader
* @throws IOException on error
public PdfReader(String filename, ReaderProperties properties) throws IOException {
new RandomAccessSourceFactory()
* Reads and parses a PDF document.
* @param filename the file name of the document
* @throws IOException on error
public PdfReader(String filename) throws IOException {
this(filename, new ReaderProperties());
PdfReader(IRandomAccessSource byteSource, ReaderProperties properties, boolean closeStream) throws IOException {
this.properties = properties;
this.tokens = getOffsetTokeniser(byteSource, closeStream);
* Close {@link PdfTokenizer}.
* @throws IOException on error.
public void close() throws IOException {
* The iText is not responsible if you decide to change the
* value of this parameter.
* @param unethicalReading true to enable unethicalReading, false to disable it.
* By default unethicalReading is disabled.
* @return this {@link PdfReader} instance.
public PdfReader setUnethicalReading(boolean unethicalReading) {
this.unethicalReading = unethicalReading;
return this;
* Defines if memory saving mode is enabled.
* By default memory saving mode is disabled for the sake of time–memory trade-off.
* If memory saving mode is enabled, document processing might slow down, but reading will be less memory demanding.
* @param memorySavingMode true to enable memory saving mode, false to disable it.
* @return this {@link PdfReader} instance.
public PdfReader setMemorySavingMode(boolean memorySavingMode) {
this.memorySavingMode = memorySavingMode;
return this;
* Get the current {@link StrictnessLevel} of the reader.
* @return the current {@link StrictnessLevel}
public StrictnessLevel getStrictnessLevel() {
return strictnessLevel;
* Set the {@link StrictnessLevel} for the reader. If the argument is {@code null}, then
* the {@link PdfReader#DEFAULT_STRICTNESS_LEVEL} will be used.
* @param strictnessLevel the {@link StrictnessLevel} to set
* @return this {@link PdfReader} instance
public PdfReader setStrictnessLevel(StrictnessLevel strictnessLevel) {
this.strictnessLevel = strictnessLevel == null ? DEFAULT_STRICTNESS_LEVEL : strictnessLevel;
return this;
* Gets whether {@link #close()} method shall close input stream.
* @return true, if {@link #close()} method will close input stream,
* otherwise false.
public boolean isCloseStream() {
return tokens.isCloseStream();
* Sets whether {@link #close()} method shall close input stream.
* @param closeStream true, if {@link #close()} method shall close input stream,
* otherwise false.
public void setCloseStream(boolean closeStream) {
* If any exception generated while reading XRef section, PdfReader will try to rebuild it.
* @return true, if PdfReader rebuilt Cross-Reference section.
* @throws PdfException if the method has been invoked before the PDF document was read.
public boolean hasRebuiltXref() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
return rebuiltXref;
* Some documents contain hybrid XRef, for more information see " Compatibility with Applications
* That Do Not Support Compressed Reference Streams" in PDF 32000-1:2008 spec.
* @return true, if the document has hybrid Cross-Reference section.
* @throws PdfException if the method has been invoked before the PDF document was read.
public boolean hasHybridXref() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
return hybridXref;
* Indicates whether the document has Cross-Reference Streams.
* @return true, if the document has Cross-Reference Streams.
* @throws PdfException if the method has been invoked before the PDF document was read.
public boolean hasXrefStm() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
return xrefStm;
* If any exception generated while reading PdfObject, PdfReader will try to fix offsets of all objects.
* This method's returned value might change over time, because PdfObjects reading
* can be postponed even up to document closing.
* @return true, if PdfReader fixed offsets of PdfObjects.
* @throws PdfException if the method has been invoked before the PDF document was read.
public boolean hasFixedXref() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
return fixedXref;
* Gets position of the last Cross-Reference table.
* @return -1 if Cross-Reference table has rebuilt, otherwise position of the last Cross-Reference table.
* @throws PdfException if the method has been invoked before the PDF document was read.
public long getLastXref() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
return lastXref;
* Reads, decrypt and optionally decode stream bytes.
* Note, this method doesn't store actual bytes in any internal structures.
* @param stream a {@link PdfStream} stream instance to be read and optionally decoded.
* @param decode true if to get decoded stream bytes, false if to leave it originally encoded.
* @return byte[] array.
* @throws IOException on error.
public byte[] readStreamBytes(PdfStream stream, boolean decode) throws IOException {
byte[] b = readStreamBytesRaw(stream);
if (decode && b != null) {
return decodeBytes(b, stream);
} else {
return b;
* Reads and decrypt stream bytes.
* Note, this method doesn't store actual bytes in any internal structures.
* @param stream a {@link PdfStream} stream instance to be read
* @return byte[] array.
* @throws IOException on error.
public byte[] readStreamBytesRaw(PdfStream stream) throws IOException {
PdfName type = stream.getAsName(PdfName.Type);
if (!PdfName.XRef.equals(type) && !PdfName.ObjStm.equals(type)) {
long offset = stream.getOffset();
if (offset <= 0)
return null;
int length = stream.getLength();
if (length <= 0)
return new byte[0];
RandomAccessFileOrArray file = tokens.getSafeFile();
byte[] bytes = null;
try {
bytes = new byte[length];
boolean embeddedStream = pdfDocument.doesStreamBelongToEmbeddedFile(stream);
if (decrypt != null && (!decrypt.isEmbeddedFilesOnly() || embeddedStream)) {
PdfObject filter = stream.get(PdfName.Filter, true);
boolean skip = false;
if (filter != null) {
if (filter.isFlushed()) {
if (PdfName.Crypt.equals(filter)) {
skip = true;
} else if (filter.getType() == PdfObject.ARRAY) {
PdfArray filters = (PdfArray) filter;
for (int k = 0; k < filters.size(); k++) {
if (filters.get(k).isFlushed()) {
if (!filters.isEmpty() && PdfName.Crypt.equals(filters.get(k, true))) {
skip = true;
if (!skip) {
decrypt.setHashKeyForNextObject(stream.getIndirectReference().getObjNumber(), stream.getIndirectReference().getGenNumber());
bytes = decrypt.decryptByteArray(bytes);
} finally {
try {
} catch (Exception ignored) {
return bytes;
* Reads, decrypts and optionally decodes stream bytes into {@link ByteArrayInputStream}.
* User is responsible for closing returned stream.
* @param stream a {@link PdfStream} stream instance to be read
* @param decode true if to get decoded stream, false if to leave it originally encoded.
* @return InputStream or {@code null} if reading was failed.
* @throws IOException on error.
public InputStream readStream(PdfStream stream, boolean decode) throws IOException {
byte[] bytes = readStreamBytes(stream, decode);
return bytes != null ? new ByteArrayInputStream(bytes) : null;
* Decode bytes applying the filters specified in the provided dictionary using default filter handlers.
* @param b the bytes to decode
* @param streamDictionary the dictionary that contains filter information
* @return the decoded bytes
* @throws PdfException if there are any problems decoding the bytes
public static byte[] decodeBytes(byte[] b, PdfDictionary streamDictionary) {
return decodeBytes(b, streamDictionary, FilterHandlers.getDefaultFilterHandlers());
* Decode a byte[] applying the filters specified in the provided dictionary using the provided filter handlers.
* @param b the bytes to decode
* @param streamDictionary the dictionary that contains filter information
* @param filterHandlers the map used to look up a handler for each type of filter
* @return the decoded bytes
* @throws PdfException if there are any problems decoding the bytes
public static byte[] decodeBytes(byte[] b, PdfDictionary streamDictionary, Map filterHandlers) {
if (b == null) {
return null;
PdfObject filter = streamDictionary.get(PdfName.Filter);
PdfArray filters = new PdfArray();
if (filter != null) {
if (filter.getType() == PdfObject.NAME) {
} else if (filter.getType() == PdfObject.ARRAY) {
filters = ((PdfArray) filter);
MemoryLimitsAwareHandler memoryLimitsAwareHandler = null;
if (null != streamDictionary.getIndirectReference()) {
memoryLimitsAwareHandler = streamDictionary.getIndirectReference().getDocument().memoryLimitsAwareHandler;
final boolean memoryLimitsAwarenessRequired = null != memoryLimitsAwareHandler &&
if(memoryLimitsAwarenessRequired) {
PdfArray dp = new PdfArray();
PdfObject dpo = streamDictionary.get(PdfName.DecodeParms);
if (dpo == null || (dpo.getType() != PdfObject.DICTIONARY && dpo.getType() != PdfObject.ARRAY)) {
if (dpo != null) dpo.release();
dpo = streamDictionary.get(PdfName.DP);
if (dpo != null) {
if (dpo.getType() == PdfObject.DICTIONARY) {
} else if (dpo.getType() == PdfObject.ARRAY) {
dp = ((PdfArray) dpo);
for (int j = 0; j < filters.size(); ++j) {
PdfName filterName = (PdfName) filters.get(j);
IFilterHandler filterHandler = filterHandlers.get(filterName);
if (filterHandler == null)
throw new PdfException(KernelExceptionMessageConstant.THIS_FILTER_IS_NOT_SUPPORTED)
PdfDictionary decodeParams;
if (j < dp.size()) {
PdfObject dpEntry = dp.get(j, true);
if (dpEntry == null || dpEntry.getType() == PdfObject.NULL) {
decodeParams = null;
} else if (dpEntry.getType() == PdfObject.DICTIONARY) {
decodeParams = (PdfDictionary) dpEntry;
} else {
throw new PdfException(KernelExceptionMessageConstant.THIS_DECODE_PARAMETER_TYPE_IS_NOT_SUPPORTED)
} else {
decodeParams = null;
b = filterHandler.decode(b, filterName, decodeParams, streamDictionary);
if (memoryLimitsAwarenessRequired) {
if (memoryLimitsAwarenessRequired) {
return b;
* Gets a new file instance of the original PDF
* document.
* @return a new file instance of the original PDF document
public RandomAccessFileOrArray getSafeFile() {
return tokens.getSafeFile();
* Provides the size of the opened file.
* @return The size of the opened file.
public long getFileLength() {
return tokens.getSafeFile().length();
* Checks if the document was opened with the owner password so that the end application
* can decide what level of access restrictions to apply. If the document is not encrypted
* it will return {@code true}.
* @return {@code true} if the document was opened with the owner password or if it's not encrypted,
* {@code false} if the document was opened with the user password.
* @throws PdfException if the method has been invoked before the PDF document was read.
public boolean isOpenedWithFullPermission() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
return !encrypted || decrypt.isOpenedWithFullPermission() || unethicalReading;
* Gets the encryption permissions. It can be used directly in
* {@link WriterProperties#setStandardEncryption(byte[], byte[], int, int)}.
* See ISO 32000-1, Table 22 for more details.
* @return the encryption permissions, an unsigned 32-bit quantity.
* @throws PdfException if the method has been invoked before the PDF document was read.
public long getPermissions() {
/* !pdfDocument.getXref().isReadingCompleted() can be used for encryption properties as well,
* because decrypt object is initialized in private readDecryptObj method which is called in our code
* in the next line after the setting isReadingCompleted line. This means that there's no way for users
* when this method would work incorrectly right now.
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
long perm = 0;
if (encrypted && decrypt.getPermissions() != null) {
perm = (long) decrypt.getPermissions();
return perm;
* Gets encryption algorithm and access permissions.
* @return {@code int} value corresponding to a certain type of encryption.
* @see EncryptionConstants
* @throws PdfException if the method has been invoked before the PDF document was read.
public int getCryptoMode() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
if (decrypt == null)
return -1;
return decrypt.getCryptoMode();
* Gets the declared PDF/A conformance level of the source document that is being read.
* Note that this information is provided via XMP metadata and is not verified by iText.
* {@link PdfReader#pdfAConformanceLevel} is lazy initialized.
* It will be initialized during the first call of this method.
* @return conformance level of the source document, or {@code null} if no PDF/A
* conformance level information is specified.
public PdfAConformanceLevel getPdfAConformanceLevel() {
if (pdfAConformanceLevel == null) {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
if (pdfDocument.getXmpMetadata() != null) {
try {
pdfAConformanceLevel = PdfAConformanceLevel.getConformanceLevel(
} catch (XMPException ignored) {
return pdfAConformanceLevel;
* Computes user password if standard encryption handler is used with Standard40, Standard128 or AES128 encryption algorithm.
* @return user password, or null if not a standard encryption handler was used or if ownerPasswordUsed wasn't use to open the document.
* @throws PdfException if the method has been invoked before the PDF document was read.
public byte[] computeUserPassword() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
if (!encrypted || !decrypt.isOpenedWithFullPermission()) {
return null;
return decrypt.computeUserPassword(properties.password);
* Gets original file ID, the first element in {@link PdfName#ID} key of trailer.
* If the size of ID array does not equal 2, an empty array will be returned.
* The returned value reflects the value that was written in opened document. If document is modified,
* the ultimate document id can be retrieved from {@link PdfDocument#getOriginalDocumentId()}.
* @return byte array represents original file ID.
* @see PdfDocument#getOriginalDocumentId()
* @throws PdfException if the method has been invoked before the PDF document was read.
public byte[] getOriginalFileId() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
PdfArray id = trailer.getAsArray(PdfName.ID);
if (id != null && id.size() == 2) {
return ByteUtils.getIsoBytes(id.getAsString(0).getValue());
} else {
return new byte[0];
* Gets modified file ID, the second element in {@link PdfName#ID} key of trailer.
* If the size of ID array does not equal 2, an empty array will be returned.
* The returned value reflects the value that was written in opened document. If document is modified,
* the ultimate document id can be retrieved from {@link PdfDocument#getModifiedDocumentId()}.
* @return byte array represents modified file ID.
* @see PdfDocument#getModifiedDocumentId()
* @throws PdfException if the method has been invoked before the PDF document was read.
public byte[] getModifiedFileId() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
PdfArray id = trailer.getAsArray(PdfName.ID);
if (id != null && id.size() == 2) {
return ByteUtils.getIsoBytes(id.getAsString(1).getValue());
} else {
return new byte[0];
* Checks if the {@link PdfDocument} read with this {@link PdfReader} is encrypted.
* @return {@code true} is the document is encrypted, otherwise {@code false}.
* @throws PdfException if the method has been invoked before the PDF document was read.
public boolean isEncrypted() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
return encrypted;
* Parses the entire PDF
* @throws IOException if an I/O error occurs.
protected void readPdf() throws IOException {
String version = tokens.checkPdfHeader();
try {
this.headerPdfVersion = PdfVersion.fromString(version);
} catch (IllegalArgumentException exc) {
throw new PdfException(KernelExceptionMessageConstant.PDF_VERSION_IS_NOT_VALID, version);
try {
} catch (XrefCycledReferencesException | MemoryLimitsAwareException | InvalidXRefPrevException ex) {
// Throws an exception when xref stream has cycled references(due to lack of opportunity to fix such an
// issue) or xref tables have cycled references and PdfReader.StrictnessLevel set to CONSERVATIVE.
// Also throw an exception when xref structure size exceeds jvm memory limit.
throw ex;
} catch (RuntimeException ex) {
if (StrictnessLevel.CONSERVATIVE.isStricter(this.getStrictnessLevel())) {
Logger logger = LoggerFactory.getLogger(PdfReader.class);
logger.error(IoLogMessageConstant.XREF_ERROR_WHILE_READING_TABLE_WILL_BE_REBUILT, ex);
} else {
throw ex;
protected void readObjectStream(PdfStream objectStream) throws IOException {
int objectStreamNumber = objectStream.getIndirectReference().getObjNumber();
int first = objectStream.getAsNumber(PdfName.First).intValue();
int n = objectStream.getAsNumber(PdfName.N).intValue();
byte[] bytes = readStreamBytes(objectStream, true);
PdfTokenizer saveTokens = tokens;
try {
tokens = new PdfTokenizer(new RandomAccessFileOrArray(new RandomAccessSourceFactory().createSource(bytes)));
int[] address = new int[n];
int[] objNumber = new int[n];
boolean ok = true;
for (int k = 0; k < n; ++k) {
ok = tokens.nextToken();
if (!ok)
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
ok = false;
objNumber[k] = tokens.getIntValue();
ok = tokens.nextToken();
if (!ok)
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
ok = false;
address[k] = tokens.getIntValue() + first;
if (!ok)
throw new PdfException(KernelExceptionMessageConstant.ERROR_WHILE_READING_OBJECT_STREAM);
for (int k = 0; k < n; ++k) {
PdfObject obj;
PdfIndirectReference reference = pdfDocument.getXref().get(objNumber[k]);
if (reference.refersTo != null || reference.getObjStreamNumber() != objectStreamNumber) {
// We skip reading of objects stream's element k if either it is already available in xref
// or if corresponding indirect object reference points to a different object stream.
// The first check prevents from re-initializing objects which are already read. One of the cases
// when this can happen is that some other object from this objects stream was released and requested
// to be re-read.
// Second check ensures that object has no incremental updates and is not freed in append mode.
if (tokens.getTokenType() == PdfTokenizer.TokenType.Number) {
// This ensure that we don't even try to read as indirect reference token (two numbers and "R")
// which are forbidden in object streams.
obj = new PdfNumber(tokens.getByteContent());
} else {
obj = readObject(false, true);
} finally {
tokens = saveTokens;
protected PdfObject readObject(PdfIndirectReference reference) {
return readObject(reference, true);
protected PdfObject readObject(boolean readAsDirect) throws IOException {
return readObject(readAsDirect, false);
protected PdfObject readReference(boolean readAsDirect) {
int num = tokens.getObjNr();
if (num < 0) {
return createPdfNullInstance(readAsDirect);
PdfXrefTable table = pdfDocument.getXref();
PdfIndirectReference reference = table.get(num);
if (reference != null) {
if (reference.isFree()) {
Logger logger = LoggerFactory.getLogger(PdfReader.class);
logger.warn(MessageFormatUtil.format(IoLogMessageConstant.INVALID_INDIRECT_REFERENCE, tokens.getObjNr(),
return createPdfNullInstance(readAsDirect);
if (reference.getGenNumber() != tokens.getGenNr()) {
if (fixedXref) {
Logger logger = LoggerFactory.getLogger(PdfReader.class);
MessageFormatUtil.format(IoLogMessageConstant.INVALID_INDIRECT_REFERENCE, tokens.getObjNr(),
return createPdfNullInstance(readAsDirect);
} else {
throw new PdfException(KernelExceptionMessageConstant.INVALID_INDIRECT_REFERENCE,
MessageFormatUtil.format("{0} {1} R", reference.getObjNumber(), reference.getGenNumber()));
} else {
if (table.isReadingCompleted()) {
Logger logger = LoggerFactory.getLogger(PdfReader.class);
logger.warn(MessageFormatUtil.format(IoLogMessageConstant.INVALID_INDIRECT_REFERENCE, tokens.getObjNr(),
return createPdfNullInstance(readAsDirect);
} else {
reference = table.add((PdfIndirectReference) new PdfIndirectReference(pdfDocument,
num, tokens.getGenNr(), 0).setState(PdfObject.READING));
return reference;
protected PdfObject readObject(boolean readAsDirect, boolean objStm) throws IOException {
PdfTokenizer.TokenType type = tokens.getTokenType();
switch (type) {
case StartDic: {
PdfDictionary dict = readDictionary(objStm);
long pos = tokens.getPosition();
// be careful in the trailer. May not be a "next" token.
boolean hasNext;
do {
hasNext = tokens.nextToken();
} while (hasNext && tokens.getTokenType() == PdfTokenizer.TokenType.Comment);
if (hasNext && tokens.tokenValueEqualsTo(PdfTokenizer.Stream)) {
//skip whitespaces
int ch;
do {
ch = tokens.read();
} while (ch == 32 || ch == 9 || ch == 0 || ch == 12);
if (ch != '\n') {
ch = tokens.read();
if (ch != '\n') {
PdfStream pdfStream = new PdfStream(tokens.getPosition(), dict);
tokens.seek(pdfStream.getOffset() + pdfStream.getLength());
return pdfStream;
} else {
return dict;
case StartArray:
return readArray(objStm);
case Number:
return new PdfNumber(tokens.getByteContent());
case String: {
PdfString pdfString = new PdfString(tokens.getByteContent(), tokens.isHexString());
if (encrypted && !decrypt.isEmbeddedFilesOnly() && !objStm) {
pdfString.setDecryption(currentIndirectReference.getObjNumber(), currentIndirectReference.getGenNumber(), decrypt);
return pdfString;
case Name:
return readPdfName(readAsDirect);
case Ref:
return readReference(readAsDirect);
case EndOfFile:
throw new PdfException(KernelExceptionMessageConstant.UNEXPECTED_END_OF_FILE);
if (tokens.tokenValueEqualsTo(PdfTokenizer.Null)) {
return createPdfNullInstance(readAsDirect);
} else if (tokens.tokenValueEqualsTo(PdfTokenizer.True)) {
if (readAsDirect) {
return PdfBoolean.TRUE;
} else {
return new PdfBoolean(true);
} else if (tokens.tokenValueEqualsTo(PdfTokenizer.False)) {
if (readAsDirect) {
return PdfBoolean.FALSE;
} else {
return new PdfBoolean(false);
return null;
protected PdfName readPdfName(boolean readAsDirect) {
if (readAsDirect) {
PdfName cachedName = PdfName.staticNames.get(tokens.getStringValue());
if (cachedName != null)
return cachedName;
// an indirect name (how odd...), or a non-standard one
return new PdfName(tokens.getByteContent());
protected PdfDictionary readDictionary(boolean objStm) throws IOException {
PdfDictionary dic = new PdfDictionary();
while (true) {
if (tokens.getTokenType() == PdfTokenizer.TokenType.EndDic) {
if (tokens.getTokenType() != PdfTokenizer.TokenType.Name) {
KernelExceptionMessageConstant.THIS_DICTIONARY_KEY_IS_NOT_A_NAME, tokens.getStringValue());
PdfName name = readPdfName(true);
PdfObject obj = readObject(true, objStm);
if (obj == null) {
if (tokens.getTokenType() == PdfTokenizer.TokenType.EndDic)
format(KernelExceptionMessageConstant.UNEXPECTED_TOKEN, ">>"));
if (tokens.getTokenType() == PdfTokenizer.TokenType.EndArray)
format(KernelExceptionMessageConstant.UNEXPECTED_TOKEN, "]"));
dic.put(name, obj);
return dic;
protected PdfArray readArray(boolean objStm) throws IOException {
PdfArray array = new PdfArray();
while (true) {
PdfObject obj = readObject(true, objStm);
if (obj == null) {
if (tokens.getTokenType() != PdfTokenizer.TokenType.EndArray) {
return array;
protected void readXref() throws IOException {
if (!tokens.tokenValueEqualsTo(PdfTokenizer.Startxref)) {
throw new PdfException(KernelExceptionMessageConstant.PDF_STARTXREF_NOT_FOUND, tokens);
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
throw new PdfException(KernelExceptionMessageConstant.PDF_STARTXREF_IS_NOT_FOLLOWED_BY_A_NUMBER, tokens);
long startxref = tokens.getLongValue();
lastXref = startxref;
eofPos = tokens.getPosition();
try {
if (readXrefStream(startxref)) {
xrefStm = true;
} catch (XrefCycledReferencesException
| MemoryLimitsAwareException
| InvalidXRefPrevException exceptionWhileReadingXrefStream) {
throw exceptionWhileReadingXrefStream;
} catch (Exception ignored) {
// Do nothing.
// clear xref because of possible issues at reading xref stream.
trailer = readXrefSection();
// Prev key - integer value.
// (Present only if the file has more than one cross-reference section; shall be an indirect reference).
// The byte offset in the decoded stream from the beginning of the file
// to the beginning of the previous cross-reference section.
PdfDictionary trailer2 = trailer;
final Set alreadyVisitedXrefTables = new HashSet<>();
while (true) {
PdfNumber prev = getXrefPrev(trailer2.get(PdfName.Prev, false));
if (prev == null) {
long prevXrefOffset = prev.longValue();
if (alreadyVisitedXrefTables.contains(prevXrefOffset)) {
if (StrictnessLevel.CONSERVATIVE.isStricter(this.getStrictnessLevel())) {
// Throw the exception to rebuild xref table, it'll be caught in method above.
throw new PdfException(KernelExceptionMessageConstant.
} else {
throw new XrefCycledReferencesException(
startxref = prevXrefOffset;
trailer2 = readXrefSection();
Integer xrefSize = trailer.getAsInt(PdfName.Size);
if (xrefSize == null) {
throw new PdfException(KernelExceptionMessageConstant.INVALID_XREF_TABLE);
protected PdfDictionary readXrefSection() throws IOException {
if (!tokens.tokenValueEqualsTo(PdfTokenizer.Xref))
PdfXrefTable xref = pdfDocument.getXref();
while (true) {
if (tokens.tokenValueEqualsTo(PdfTokenizer.Trailer)) {
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
int start = tokens.getIntValue();
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
int end = tokens.getIntValue() + start;
for (int num = start; num < end; num++) {
long pos = tokens.getLongValue();
int gen = tokens.getIntValue();
if (pos == 0L && gen == 65535 && num == 1 && start != 0) {
// Very rarely can an XREF have an incorrect start number. (SUP-1557)
// e.g.
// xref
// 1 13
// 0000000000 65535 f
// 0000000009 00000 n
// 0000215136 00000 n
// [...]
// Because of how iText reads (and initializes) the XREF, this will lead to the XREF having two 0000 65535 entries.
// This throws off the parsing and other operations you'd like to perform.
// To fix this we reset our index and decrease the limit when we've encountered the magic entry at position 1.
num = 0;
PdfIndirectReference reference = xref.get(num);
boolean refReadingState = reference != null && reference.checkState(PdfObject.READING) && reference.getGenNumber() == gen;
// for references that are added by xref table itself (like 0 entry)
boolean refFirstEncountered = reference == null
|| !refReadingState && reference.getDocument() == null;
if (refFirstEncountered) {
reference = new PdfIndirectReference(pdfDocument, num, gen, pos);
} else if (refReadingState) {
} else {
if (tokens.tokenValueEqualsTo(PdfTokenizer.N)) {
if (pos == 0) {
} else if (tokens.tokenValueEqualsTo(PdfTokenizer.F)) {
if (refFirstEncountered) {
} else {
if (refFirstEncountered) {
PdfDictionary trailer = (PdfDictionary) readObject(false);
PdfObject xrs = trailer.get(PdfName.XRefStm);
if (xrs != null && xrs.getType() == PdfObject.NUMBER) {
int loc = ((PdfNumber) xrs).intValue();
try {
xrefStm = true;
hybridXref = true;
} catch (IOException e) {
throw e;
return trailer;
protected boolean readXrefStream(long ptr) throws IOException {
final Set alreadyVisitedXrefStreams = new HashSet<>();
while (ptr != -1) {
if (!tokens.nextToken()) {
return false;
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
return false;
if (!tokens.nextToken() || tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
return false;
if (!tokens.nextToken() || !tokens.tokenValueEqualsTo(PdfTokenizer.Obj)) {
return false;
PdfXrefTable xref = pdfDocument.getXref();
PdfObject object = readObject(false);
PdfStream xrefStream;
if (object.getType() == PdfObject.STREAM) {
xrefStream = (PdfStream) object;
if (!PdfName.XRef.equals(xrefStream.get(PdfName.Type))) {
return false;
} else {
return false;
if (trailer == null) {
trailer = new PdfDictionary();
int size = ((PdfNumber) xrefStream.get(PdfName.Size)).intValue();
PdfArray index;
PdfObject obj = xrefStream.get(PdfName.Index);
if (obj == null) {
index = new PdfArray();
index.add(new PdfNumber(0));
index.add(new PdfNumber(size));
} else {
index = (PdfArray) obj;
PdfArray w = xrefStream.getAsArray(PdfName.W);
long prev = -1;
obj = getXrefPrev(xrefStream.get(PdfName.Prev, false));
if (obj != null)
prev = ((PdfNumber) obj).longValue();
byte[] b = readStreamBytes(xrefStream, true);
int bptr = 0;
int[] wc = new int[3];
for (int k = 0; k < 3; ++k) {
wc[k] = w.getAsNumber(k).intValue();
for (int idx = 0; idx < index.size(); idx += 2) {
int start = index.getAsNumber(idx).intValue();
int length = index.getAsNumber(idx + 1).intValue();
xref.setCapacity(start + length);
while (length-- > 0) {
int type = 1;
if (wc[0] > 0) {
type = 0;
for (int k = 0; k < wc[0]; ++k) {
type = (type << 8) + (b[bptr++] & 0xff);
long field2 = 0;
for (int k = 0; k < wc[1]; ++k) {
field2 = (field2 << 8) + (b[bptr++] & 0xff);
int field3 = 0;
for (int k = 0; k < wc[2]; ++k) {
field3 = (field3 << 8) + (b[bptr++] & 0xff);
int base = start;
PdfIndirectReference newReference;
switch (type) {
case 0:
newReference = (PdfIndirectReference) new PdfIndirectReference(pdfDocument, base, field3, field2).setState(PdfObject.FREE);
case 1:
newReference = new PdfIndirectReference(pdfDocument, base, field3, field2);
case 2:
newReference = new PdfIndirectReference(pdfDocument, base, 0, field3);
newReference.setObjStreamNumber((int) field2);
throw new PdfException(KernelExceptionMessageConstant.INVALID_XREF_STREAM);
PdfIndirectReference reference = xref.get(base);
boolean refReadingState = reference != null && reference.checkState(PdfObject.READING) && reference.getGenNumber() == newReference.getGenNumber();
// for references that are added by xref table itself (like 0 entry)
boolean refFirstEncountered = reference == null
|| !refReadingState && reference.getDocument() == null;
if (refFirstEncountered) {
} else if (refReadingState) {
ptr = prev;
if (alreadyVisitedXrefStreams.contains(ptr)) {
throw new XrefCycledReferencesException(
return true;
protected void fixXref() throws IOException {
fixedXref = true;
PdfXrefTable xref = pdfDocument.getXref();
ByteBuffer buffer = new ByteBuffer(24);
PdfTokenizer lineTokeniser = new PdfTokenizer(new RandomAccessFileOrArray(new ReusableRandomAccessSource(buffer)));
for (; ; ) {
long pos = tokens.getPosition();
// added boolean because of mailing list issue (17 Feb. 2014)
if (!tokens.readLineSegment(buffer, true))
if (buffer.get(0) >= '0' && buffer.get(0) <= '9') {
int[] obj = PdfTokenizer.checkObjectStart(lineTokeniser);
if (obj == null)
int num = obj[0];
int gen = obj[1];
PdfIndirectReference reference = xref.get(num);
if (reference != null && reference.getGenNumber() == gen) {
protected void rebuildXref() throws IOException {
xrefStm = false;
hybridXref = false;
rebuiltXref = true;
PdfXrefTable xref = pdfDocument.getXref();
trailer = null;
ByteBuffer buffer = new ByteBuffer(24);
PdfTokenizer lineTokenizer =
new PdfTokenizer(new RandomAccessFileOrArray(new ReusableRandomAccessSource(buffer)));
for (; ; ) {
long pos = tokens.getPosition();
// added boolean because of mailing list issue (17 Feb. 2014)
if (!tokens.readLineSegment(buffer, true))
if (buffer.get(0) == 't') {
if (!PdfTokenizer.checkTrailer(buffer))
pos = tokens.getPosition();
try {
PdfDictionary dic = (PdfDictionary) readObject(false);
if (dic.get(PdfName.Root, false) != null)
trailer = dic;
} catch (Exception e) {
} else if (buffer.get(0) >= '0' && buffer.get(0) <= '9') {
int[] obj = PdfTokenizer.checkObjectStart(lineTokenizer);
if (obj == null)
int num = obj[0];
int gen = obj[1];
if (xref.get(num) == null || xref.get(num).getGenNumber() <= gen) {
xref.add(new PdfIndirectReference(pdfDocument, num, gen, pos));
if (trailer == null) {
throw new PdfException(KernelExceptionMessageConstant.TRAILER_NOT_FOUND);
protected PdfNumber getXrefPrev(PdfObject prevObjectToCheck) {
if (prevObjectToCheck == null) {
return null;
if (prevObjectToCheck.getType() == PdfObject.NUMBER) {
return (PdfNumber) prevObjectToCheck;
} else {
if (prevObjectToCheck.getType() == PdfObject.INDIRECT_REFERENCE &&
StrictnessLevel.CONSERVATIVE.isStricter(this.getStrictnessLevel())) {
final PdfObject value = ((PdfIndirectReference) prevObjectToCheck).getRefersTo(true);
if (value != null && value.getType() == PdfObject.NUMBER) {
return (PdfNumber) value;
throw new InvalidXRefPrevException(
boolean isMemorySavingMode() {
return memorySavingMode;
private void processArrayReadError() {
final String error = MessageFormatUtil.format(KernelExceptionMessageConstant.UNEXPECTED_TOKEN,
new String(tokens.getByteContent(), StandardCharsets.UTF_8));
if (StrictnessLevel.CONSERVATIVE.isStricter(this.getStrictnessLevel())) {
final Logger logger = LoggerFactory.getLogger(PdfReader.class);
} else {
private void readDecryptObj() {
if (encrypted)
PdfDictionary enc = trailer.getAsDictionary(PdfName.Encrypt);
if (enc == null)
encrypted = true;
PdfName filter = enc.getAsName(PdfName.Filter);
if (PdfName.Adobe_PubSec.equals(filter)) {
if (properties.certificate == null) {
throw new PdfException(
decrypt = new PdfEncryption(enc, properties.certificateKey, properties.certificate,
properties.certificateKeyProvider, properties.externalDecryptionProcess);
} else if (PdfName.Standard.equals(filter)) {
decrypt = new PdfEncryption(enc, properties.password, getOriginalFileId());
} else {
throw new UnsupportedSecurityHandlerException(MessageFormatUtil.format(UnsupportedSecurityHandlerException.UnsupportedSecurityHandler, filter));
private PdfObject readObject(PdfIndirectReference reference, boolean fixXref) {
if (reference == null)
return null;
if (reference.refersTo != null)
return reference.refersTo;
try {
currentIndirectReference = reference;
if (reference.getObjStreamNumber() > 0) {
PdfStream objectStream = (PdfStream) pdfDocument.getXref().
return reference.refersTo;
} else if (reference.getOffset() > 0) {
PdfObject object;
try {
if (tokens.getTokenType() != PdfTokenizer.TokenType.Obj
|| tokens.getObjNr() != reference.getObjNumber()
|| tokens.getGenNr() != reference.getGenNumber()) {
KernelExceptionMessageConstant.INVALID_OFFSET_FOR_THIS_OBJECT, reference.toString());
object = readObject(false);
} catch (RuntimeException ex) {
if (fixXref && reference.getObjStreamNumber() == 0) {
object = readObject(reference, false);
} else {
throw ex;
return object != null ? object.setIndirectReference(reference) : null;
} else {
return null;
} catch (IOException e) {
throw new PdfException(KernelExceptionMessageConstant.CANNOT_READ_PDF_OBJECT, e);
private void checkPdfStreamLength(PdfStream pdfStream) throws IOException {
if (!correctStreamLength)
long fileLength = tokens.length();
long start = pdfStream.getOffset();
boolean calc = false;
int streamLength = 0;
PdfNumber pdfNumber = pdfStream.getAsNumber(PdfName.Length);
if (pdfNumber != null) {
streamLength = pdfNumber.intValue();
if (streamLength + start > fileLength - 20) {
calc = true;
} else {
tokens.seek(start + streamLength);
String line = tokens.readString(20);
if (!line.startsWith(endstream2) && !line.startsWith(endstream3) &&
!line.startsWith(endstream4) && !line.startsWith(endstream1)) {
calc = true;
} else {
pdfNumber = new PdfNumber(0);
pdfStream.put(PdfName.Length, pdfNumber);
calc = true;
if (calc) {
ByteBuffer line = new ByteBuffer(16);
long pos;
while (true) {
pos = tokens.getPosition();
// added boolean because of mailing list issue (17 Feb. 2014)
if (!tokens.readLineSegment(line, false)) {
if (!StrictnessLevel.CONSERVATIVE.isStricter(this.strictnessLevel)) {
throw new PdfException(KernelExceptionMessageConstant.STREAM_SHALL_END_WITH_ENDSTREAM);
if (line.startsWith(endstream)) {
} else if (line.startsWith(endobj)) {
tokens.seek(pos - 16);
String s = tokens.readString(16);
int index = s.indexOf(endstream1);
if (index >= 0)
pos = pos - 16 + index;
streamLength = (int) (pos - start);
tokens.seek(pos - 2);
if (tokens.read() == 13) {
tokens.seek(pos - 1);
if (tokens.read() == 10) {
private PdfObject createPdfNullInstance(boolean readAsDirect) {
if (readAsDirect) {
return PdfNull.PDF_NULL;
} else {
return new PdfNull();
* Utility method that checks the provided byte source to see if it has junk bytes at the beginning. If junk bytes
* are found, construct a tokeniser that ignores the junk. Otherwise, construct a tokeniser for the byte source as it is
* @param byteSource the source to check
* @return a tokeniser that is guaranteed to start at the PDF header
* @throws IOException if there is a problem reading the byte source
private static PdfTokenizer getOffsetTokeniser(IRandomAccessSource byteSource, boolean closeStream)
throws IOException {
PdfTokenizer tok = new PdfTokenizer(new RandomAccessFileOrArray(byteSource));
int offset;
try {
offset = tok.getHeaderOffset();
} catch (com.itextpdf.io.exceptions.IOException ex) {
if (closeStream) {
throw ex;
if (offset != 0) {
IRandomAccessSource offsetSource = new WindowRandomAccessSource(byteSource, offset);
tok = new PdfTokenizer(new RandomAccessFileOrArray(offsetSource));
return tok;
protected static class ReusableRandomAccessSource implements IRandomAccessSource {
private ByteBuffer buffer;
public ReusableRandomAccessSource(ByteBuffer buffer) {
if (buffer == null) throw new IllegalArgumentException("Passed byte buffer can not be null.");
this.buffer = buffer;
public int get(long offset) {
if (offset >= buffer.size()) return -1;
return 0xff & buffer.getInternalBuffer()[(int) offset];
public int get(long offset, byte[] bytes, int off, int len) {
if (buffer == null) throw new IllegalStateException("Already closed");
if (offset >= buffer.size())
return -1;
if (offset + len > buffer.size())
len = (int) (buffer.size() - offset);
System.arraycopy(buffer.getInternalBuffer(), (int) offset, bytes, off, len);
return len;
public long length() {
return buffer.size();
public void close() {
buffer = null;
* Enumeration representing the strictness level for reading.
public enum StrictnessLevel {
* The reading strictness level at which iText fails (throws an exception) in case of
* contradiction with PDF specification, but still recovers from mild parsing errors
* and ambiguities.
* The reading strictness level at which iText tries to recover from parsing
* errors if possible.
private final int levelValue;
StrictnessLevel(int levelValue) {
this.levelValue = levelValue;
* Checks whether the current instance represents more strict reading level than
* the provided one. Note that the {@code null} is less strict than any other value.
* @param compareWith the {@link StrictnessLevel} to compare with
* @return {@code true} if the current level is stricter than the provided one
public boolean isStricter(StrictnessLevel compareWith) {
return compareWith == null || this.levelValue > compareWith.levelValue;